Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Fri, 21 Jul 2017 02:38:43 +0000 (03:38 +0100)
committerDavid S. Miller <davem@davemloft.net>
Fri, 21 Jul 2017 02:38:43 +0000 (03:38 +0100)
167 files changed:
Documentation/devicetree/bindings/net/renesas,ravb.txt
Documentation/devicetree/bindings/net/xilinx_axienet.txt [new file with mode: 0644]
Documentation/networking/ip-sysctl.txt
drivers/atm/ambassador.c
drivers/atm/eni.c
drivers/atm/firestream.c
drivers/atm/fore200e.c
drivers/atm/he.c
drivers/atm/horizon.c
drivers/atm/idt77252.c
drivers/atm/iphase.c
drivers/atm/lanai.c
drivers/atm/nicstar.c
drivers/atm/solos-pci.c
drivers/atm/zatm.c
drivers/isdn/hardware/eicon/divacapi.h
drivers/isdn/hardware/eicon/message.c
drivers/net/arcnet/arcdevice.h
drivers/net/arcnet/com20020-pci.c
drivers/net/bonding/bond_sysfs.c
drivers/net/can/at91_can.c
drivers/net/can/janz-ican3.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/phy.c
drivers/net/dsa/mv88e6xxx/phy.h
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/dsa/mv88e6xxx/port.h
drivers/net/dummy.c
drivers/net/ethernet/amd/xgbe/xgbe.h
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cadence/macb_pci.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/dec/tulip/tulip_core.c
drivers/net/ethernet/ec_bhf.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/trap.h
drivers/net/ethernet/neterion/s2io.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
drivers/net/ethernet/sfc/mcdi_port.c
drivers/net/ethernet/sun/ldmvsw.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/sun/sunvnet_common.c
drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
drivers/net/ethernet/xilinx/xilinx_axienet.h
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ieee802154/ca8210.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/ipvlan/ipvtap.c
drivers/net/macvlan.c
drivers/net/macvtap.c
drivers/net/phy/mdio_bus.c
drivers/net/tap.c
drivers/net/tun.c
drivers/net/usb/cdc_ncm.c
drivers/net/virtio_net.c
drivers/net/wireless/cisco/airo.c
drivers/net/wireless/intel/ipw2x00/ipw2100.c
drivers/net/wireless/intel/ipw2x00/ipw2200.c
drivers/net/wireless/intel/iwlegacy/3945-mac.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/filter.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/sctp.h
include/linux/skbuff.h
include/linux/virtio_net.h
include/net/af_unix.h
include/net/dsa.h
include/net/flow.h
include/net/flowcache.h [deleted file]
include/net/inetpeer.h
include/net/ip6_route.h
include/net/netns/xfrm.h
include/net/sctp/structs.h
include/net/tcp.h
include/net/xfrm.h
include/trace/events/xdp.h
include/uapi/linux/bpf.h
kernel/bpf/Makefile
kernel/bpf/devmap.c [new file with mode: 0644]
kernel/bpf/verifier.c
net/bluetooth/6lowpan.c
net/core/Makefile
net/core/dev.c
net/core/ethtool.c
net/core/filter.c
net/core/flow.c [deleted file]
net/core/skbuff.c
net/dsa/dsa.c
net/ipv4/af_inet.c
net/ipv4/gre_offload.c
net/ipv4/inetpeer.c
net/ipv4/ip_output.c
net/ipv4/ip_vti.c
net/ipv4/tcp_output.c
net/ipv4/tcp_recovery.c
net/ipv4/udp_offload.c
net/ipv4/xfrm4_policy.c
net/ipv6/ip6_output.c
net/ipv6/ip6_vti.c
net/ipv6/udp_offload.c
net/ipv6/xfrm6_policy.c
net/key/af_key.c
net/openvswitch/datapath.c
net/openvswitch/flow.c
net/openvswitch/flow.h
net/openvswitch/flow_table.c
net/packet/af_packet.c
net/rds/connection.c
net/rds/rds.h
net/rds/tcp.c
net/rds/tcp_connect.c
net/rds/tcp_send.c
net/rds/threads.c
net/sched/act_csum.c
net/sctp/auth.c
net/sctp/endpointola.c
net/sctp/ipv6.c
net/sctp/protocol.c
net/sctp/sm_make_chunk.c
net/sctp/sm_statefuns.c
net/unix/af_unix.c
net/xfrm/xfrm_device.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
samples/bpf/Makefile
samples/bpf/xdp_redirect_kern.c [new file with mode: 0644]
samples/bpf/xdp_redirect_map_kern.c [new file with mode: 0644]
samples/bpf/xdp_redirect_map_user.c [new file with mode: 0644]
samples/bpf/xdp_redirect_user.c [new file with mode: 0644]
security/selinux/include/xfrm.h
tools/hv/bondvf.sh
tools/testing/selftests/bpf/bpf_helpers.h
tools/testing/selftests/bpf/test_maps.c

index b519503..4717bc2 100644 (file)
@@ -4,19 +4,24 @@ This file provides information on what the device node for the Ethernet AVB
 interface contains.
 
 Required properties:
-- compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC.
-             "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC.
-             "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC.
-             "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC.
-             "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC.
-             "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC.
-             "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC.
-             "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface.
-             "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface.
+- compatible: Must contain one or more of the following:
+      - "renesas,etheravb-r8a7743" for the R8A7743 SoC.
+      - "renesas,etheravb-r8a7790" for the R8A7790 SoC.
+      - "renesas,etheravb-r8a7791" for the R8A7791 SoC.
+      - "renesas,etheravb-r8a7792" for the R8A7792 SoC.
+      - "renesas,etheravb-r8a7793" for the R8A7793 SoC.
+      - "renesas,etheravb-r8a7794" for the R8A7794 SoC.
+      - "renesas,etheravb-rcar-gen2" as a fallback for the above
+               R-Car Gen2 and RZ/G1 devices.
 
-             When compatible with the generic version, nodes must list the
-             SoC-specific version corresponding to the platform first
-             followed by the generic version.
+      - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
+      - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+      - "renesas,etheravb-rcar-gen3" as a fallback for the above
+               R-Car Gen3 devices.
+
+       When compatible with the generic version, nodes must list the
+       SoC-specific version corresponding to the platform first followed by
+       the generic version.
 
 - reg: offset and length of (1) the register block and (2) the stream buffer.
 - interrupts: A list of interrupt-specifiers, one for each entry in
diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
new file mode 100644 (file)
index 0000000..38f9ec0
--- /dev/null
@@ -0,0 +1,55 @@
+XILINX AXI ETHERNET Device Tree Bindings
+--------------------------------------------------------
+
+Also called  AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core
+provides connectivity to an external ethernet PHY supporting different
+interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two
+segments of memory for buffering TX and RX, as well as the capability of
+offloading TX/RX checksum calculation off the processor.
+
+Management configuration is done through the AXI interface, while payload is
+sent and received through means of an AXI DMA controller. This driver
+includes the DMA driver code, so this driver is incompatible with AXI DMA
+driver.
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible   : Must be one of "xlnx,axi-ethernet-1.00.a",
+                 "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
+- reg          : Address and length of the IO space.
+- interrupts   : Should be a list of two interrupt, TX and RX.
+- phy-handle   : Should point to the external phy device.
+                 See ethernet.txt file in the same directory.
+- xlnx,rxmem   : Set to allocated memory buffer for Rx/Tx in the hardware
+
+Optional properties:
+- phy-mode     : See ethernet.txt
+- xlnx,phy-type        : Deprecated, do not use, but still accepted in preference
+                 to phy-mode.
+- xlnx,txcsum  : 0 or empty for disabling TX checksum offload,
+                 1 to enable partial TX checksum offload,
+                 2 to enable full TX checksum offload
+- xlnx,rxcsum  : Same values as xlnx,txcsum but for RX checksum offload
+
+Example:
+       axi_ethernet_eth: ethernet@40c00000 {
+               compatible = "xlnx,axi-ethernet-1.00.a";
+               device_type = "network";
+               interrupt-parent = <&microblaze_0_axi_intc>;
+               interrupts = <2 0>;
+               phy-mode = "mii";
+               reg = <0x40c00000 0x40000>;
+               xlnx,rxcsum = <0x2>;
+               xlnx,rxmem = <0x800>;
+               xlnx,txcsum = <0x2>;
+               phy-handle = <&phy0>;
+               axi_ethernetlite_0_mdio: mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       phy0: phy@0 {
+                               device_type = "ethernet-phy";
+                               reg = <1>;
+                       };
+               };
+       };
index 974ab47..f485d55 100644 (file)
@@ -1291,8 +1291,7 @@ tag - INTEGER
 xfrm4_gc_thresh - INTEGER
        The threshold at which we will start garbage collecting for IPv4
        destination cache entries.  At twice this value the system will
-       refuse new allocations. The value must be set below the flowcache
-       limit (4096 * number of online cpus) to take effect.
+       refuse new allocations.
 
 igmp_link_local_mcast_reports - BOOLEAN
        Enable IGMP reports for link local multicast groups in the
@@ -1778,8 +1777,7 @@ ratelimit - INTEGER
 xfrm6_gc_thresh - INTEGER
        The threshold at which we will start garbage collecting for IPv6
        destination cache entries.  At twice this value the system will
-       refuse new allocations. The value must be set below the flowcache
-       limit (4096 * number of online cpus) to take effect.
+       refuse new allocations.
 
 
 IPv6 Update by:
index 906705e..acf16c3 100644 (file)
@@ -2374,7 +2374,7 @@ MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
 /********** module entry **********/
 
-static struct pci_device_id amb_pci_tbl[] = {
+static const struct pci_device_id amb_pci_tbl[] = {
        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
        { 0, }
index b042ec4..ce47eb1 100644 (file)
@@ -2292,7 +2292,7 @@ err_disable:
 }
 
 
-static struct pci_device_id eni_pci_tbl[] = {
+static const struct pci_device_id eni_pci_tbl[] = {
        { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_FPGA), 0 /* FPGA */ },
        { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_ASIC), 1 /* ASIC */ },
        { 0, }
index 22dcab9..6b6368a 100644 (file)
@@ -2030,7 +2030,7 @@ static void firestream_remove_one(struct pci_dev *pdev)
        func_exit ();
 }
 
-static struct pci_device_id firestream_pci_tbl[] = {
+static const struct pci_device_id firestream_pci_tbl[] = {
        { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS50), FS_IS50},
        { PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS155), FS_IS155},
        { 0, }
index f0433ad..f8b7e86 100644 (file)
@@ -2757,7 +2757,7 @@ static void fore200e_pca_remove_one(struct pci_dev *pci_dev)
 }
 
 
-static struct pci_device_id fore200e_pca_tbl[] = {
+static const struct pci_device_id fore200e_pca_tbl[] = {
     { PCI_VENDOR_ID_FORE, PCI_DEVICE_ID_FORE_PCA200E, PCI_ANY_ID, PCI_ANY_ID,
       0, 0, (unsigned long) &fore200e_bus[0] },
     { 0, }
index 37ee21c..8f6156d 100644 (file)
@@ -2851,7 +2851,7 @@ MODULE_PARM_DESC(irq_coalesce, "use interrupt coalescing (default 1)");
 module_param(sdh, bool, 0);
 MODULE_PARM_DESC(sdh, "use SDH framing (default 0)");
 
-static struct pci_device_id he_pci_tbl[] = {
+static const struct pci_device_id he_pci_tbl[] = {
        { PCI_VDEVICE(FORE, PCI_DEVICE_ID_FORE_HE), 0 },
        { 0, }
 };
index 0f18480..7e76b35 100644 (file)
@@ -2867,7 +2867,7 @@ MODULE_PARM_DESC(max_tx_size, "maximum size of TX AAL5 frames");
 MODULE_PARM_DESC(max_rx_size, "maximum size of RX AAL5 frames");
 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
-static struct pci_device_id hrz_pci_tbl[] = {
+static const struct pci_device_id hrz_pci_tbl[] = {
        { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_HORIZON, PCI_ANY_ID, PCI_ANY_ID,
          0, 0, 0 },
        { 0, }
index 60bacba..b7a168c 100644 (file)
@@ -3725,7 +3725,7 @@ err_out_disable_pdev:
        return err;
 }
 
-static struct pci_device_id idt77252_pci_tbl[] =
+static const struct pci_device_id idt77252_pci_tbl[] =
 {
        { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77252), 0 },
        { 0, }
index a4fa6c8..fc72b76 100644 (file)
@@ -3266,7 +3266,7 @@ static void ia_remove_one(struct pci_dev *pdev)
        kfree(iadev);
 }
 
-static struct pci_device_id ia_pci_tbl[] = {
+static const struct pci_device_id ia_pci_tbl[] = {
        { PCI_VENDOR_ID_IPHASE, 0x0008, PCI_ANY_ID, PCI_ANY_ID, },
        { PCI_VENDOR_ID_IPHASE, 0x0009, PCI_ANY_ID, PCI_ANY_ID, },
        { 0,}
index 1a9bc51..2351dad 100644 (file)
@@ -2589,7 +2589,7 @@ static int lanai_init_one(struct pci_dev *pci,
        return result;
 }
 
-static struct pci_device_id lanai_pci_tbl[] = {
+static const struct pci_device_id lanai_pci_tbl[] = {
        { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAI2) },
        { PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAIHB) },
        { 0, }  /* terminal entry */
index d879f3b..9588d80 100644 (file)
@@ -253,7 +253,7 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
        kfree(card);
 }
 
-static struct pci_device_id nicstar_pci_tbl[] = {
+static const struct pci_device_id nicstar_pci_tbl[] = {
        { PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77201), 0 },
        {0,}                    /* terminate list */
 };
index c8f2ca6..585984e 100644 (file)
@@ -1476,7 +1476,7 @@ static void fpga_remove(struct pci_dev *dev)
        kfree(card);
 }
 
-static struct pci_device_id fpga_pci_tbl[] = {
+static const struct pci_device_id fpga_pci_tbl[] = {
        { 0x10ee, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
        { 0, }
 };
index 07bdd51..1ef67db 100644 (file)
@@ -1642,7 +1642,7 @@ out_free:
 
 MODULE_LICENSE("GPL");
 
-static struct pci_device_id zatm_pci_tbl[] = {
+static const struct pci_device_id zatm_pci_tbl[] = {
        { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER },
        { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 },
        { 0, }
index a315a29..c4868a0 100644 (file)
 
 /*#define DEBUG */
 
-
-
-
-
-
-
-
-
-
+#include <linux/types.h>
 
 #define IMPLEMENT_DTMF 1
 #define IMPLEMENT_LINE_INTERCONNECT2 1
@@ -82,8 +74,6 @@
 #define CODEC_PERMANENT    0x02
 #define ADV_VOICE          0x03
 #define MAX_CIP_TYPES      5  /* kind of CIP types for group optimization */
-#define C_IND_MASK_DWORDS  ((MAX_APPL + 32) >> 5)
-
 
 #define FAX_CONNECT_INFO_BUFFER_SIZE  256
 #define NCPI_BUFFER_SIZE              256
@@ -265,8 +255,8 @@ struct _PLCI {
        word          ncci_ring_list;
        byte          inc_dis_ncci_table[MAX_CHANNELS_PER_PLCI];
        t_std_internal_command internal_command_queue[MAX_INTERNAL_COMMAND_LEVELS];
-       dword         c_ind_mask_table[C_IND_MASK_DWORDS];
-       dword         group_optimization_mask_table[C_IND_MASK_DWORDS];
+       DECLARE_BITMAP(c_ind_mask_table, MAX_APPL);
+       DECLARE_BITMAP(group_optimization_mask_table, MAX_APPL);
        byte          RBuffer[200];
        dword         msg_in_queue[MSG_IN_QUEUE_SIZE/sizeof(dword)];
        API_SAVE      saved_msg;
index 3b11422..eadd1ed 100644 (file)
@@ -23,9 +23,7 @@
  *
  */
 
-
-
-
+#include <linux/bitmap.h>
 
 #include "platform.h"
 #include "di_defs.h"
 #include "mdm_msg.h"
 #include "divasync.h"
 
-
-
 #define FILE_ "MESSAGE.C"
 #define dprintf
 
-
-
-
-
-
-
-
-
 /*------------------------------------------------------------------*/
 /* This is options supported for all adapters that are server by    */
 /* XDI driver. Allo it is not necessary to ask it from every adapter*/
@@ -72,9 +60,6 @@ static dword diva_xdi_extended_features = 0;
 /*------------------------------------------------------------------*/
 
 static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci);
-static void set_group_ind_mask(PLCI *plci);
-static void clear_group_ind_mask_bit(PLCI *plci, word b);
-static byte test_group_ind_mask_bit(PLCI *plci, word b);
 void AutomaticLaw(DIVA_CAPI_ADAPTER *);
 word CapiRelease(word);
 word CapiRegister(word);
@@ -1087,106 +1072,6 @@ static void plci_remove(PLCI *plci)
 }
 
 /*------------------------------------------------------------------*/
-/* Application Group function helpers                               */
-/*------------------------------------------------------------------*/
-
-static void set_group_ind_mask(PLCI *plci)
-{
-       word i;
-
-       for (i = 0; i < C_IND_MASK_DWORDS; i++)
-               plci->group_optimization_mask_table[i] = 0xffffffffL;
-}
-
-static void clear_group_ind_mask_bit(PLCI *plci, word b)
-{
-       plci->group_optimization_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_group_ind_mask_bit(PLCI *plci, word b)
-{
-       return ((plci->group_optimization_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-/*------------------------------------------------------------------*/
-/* c_ind_mask operations for arbitrary MAX_APPL                     */
-/*------------------------------------------------------------------*/
-
-static void clear_c_ind_mask(PLCI *plci)
-{
-       word i;
-
-       for (i = 0; i < C_IND_MASK_DWORDS; i++)
-               plci->c_ind_mask_table[i] = 0;
-}
-
-static byte c_ind_mask_empty(PLCI *plci)
-{
-       word i;
-
-       i = 0;
-       while ((i < C_IND_MASK_DWORDS) && (plci->c_ind_mask_table[i] == 0))
-               i++;
-       return (i == C_IND_MASK_DWORDS);
-}
-
-static void set_c_ind_mask_bit(PLCI *plci, word b)
-{
-       plci->c_ind_mask_table[b >> 5] |= (1L << (b & 0x1f));
-}
-
-static void clear_c_ind_mask_bit(PLCI *plci, word b)
-{
-       plci->c_ind_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_c_ind_mask_bit(PLCI *plci, word b)
-{
-       return ((plci->c_ind_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-static void dump_c_ind_mask(PLCI *plci)
-{
-       word i, j, k;
-       dword d;
-       char *p;
-       char buf[40];
-
-       for (i = 0; i < C_IND_MASK_DWORDS; i += 4)
-       {
-               p = buf + 36;
-               *p = '\0';
-               for (j = 0; j < 4; j++)
-               {
-                       if (i + j < C_IND_MASK_DWORDS)
-                       {
-                               d = plci->c_ind_mask_table[i + j];
-                               for (k = 0; k < 8; k++)
-                               {
-                                       *(--p) = hex_asc_lo(d);
-                                       d >>= 4;
-                               }
-                       }
-                       else if (i != 0)
-                       {
-                               for (k = 0; k < 8; k++)
-                                       *(--p) = ' ';
-                       }
-                       *(--p) = ' ';
-               }
-               dbug(1, dprintf("c_ind_mask =%s", (char *) p));
-       }
-}
-
-
-
-
-
-#define dump_plcis(a)
-
-
-
-/*------------------------------------------------------------------*/
 /* translation function for each message                            */
 /*------------------------------------------------------------------*/
 
@@ -1457,13 +1342,13 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
                return 1;
        }
        else if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) {
-               clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-               dump_c_ind_mask(plci);
+               __clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+               dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
                Reject = GET_WORD(parms[0].info);
                dbug(1, dprintf("Reject=0x%x", Reject));
                if (Reject)
                {
-                       if (c_ind_mask_empty(plci))
+                       if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
                        {
                                if ((Reject & 0xff00) == 0x3400)
                                {
@@ -1553,11 +1438,8 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
                                sig_req(plci, CALL_RES, 0);
                        }
 
-                       for (i = 0; i < max_appl; i++) {
-                               if (test_c_ind_mask_bit(plci, i)) {
-                                       sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-                               }
-                       }
+                       for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+                               sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
                }
        }
        return 1;
@@ -1584,13 +1466,10 @@ static byte disconnect_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
        {
                if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
                {
-                       clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+                       __clear_bit(appl->Id - 1, plci->c_ind_mask_table);
                        plci->appl = appl;
-                       for (i = 0; i < max_appl; i++)
-                       {
-                               if (test_c_ind_mask_bit(plci, i))
-                                       sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-                       }
+                       for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+                               sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
                        plci->State = OUTG_DIS_PENDING;
                }
                if (plci->Sig.Id && plci->appl)
@@ -1634,7 +1513,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
        {
                /* clear ind mask bit, just in case of collsion of          */
                /* DISCONNECT_IND and CONNECT_RES                           */
-               clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+               __clear_bit(appl->Id - 1, plci->c_ind_mask_table);
                ncci_free_receive_buffers(plci, 0);
                if (plci_remove_check(plci))
                {
@@ -1642,7 +1521,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
                }
                if (plci->State == INC_DIS_PENDING
                    || plci->State == SUSPENDING) {
-                       if (c_ind_mask_empty(plci)) {
+                       if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
                                if (plci->State != SUSPENDING) plci->State = IDLE;
                                dbug(1, dprintf("chs=%d", plci->channels));
                                if (!plci->channels) {
@@ -3351,13 +3230,11 @@ static byte select_b_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
                                }
                                plci->State = INC_CON_CONNECTED_ALERT;
                                plci->appl = appl;
-                               clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-                               dump_c_ind_mask(plci);
-                               for (i = 0; i < max_appl; i++) /* disconnect the other appls */
-                               {                         /* its quasi a connect        */
-                                       if (test_c_ind_mask_bit(plci, i))
-                                               sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-                               }
+                               __clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+                               dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
+                               /* disconnect the other appls its quasi a connect */
+                               for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+                                       sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
                        }
 
                        api_save_msg(msg, "s", &plci->saved_msg);
@@ -5692,19 +5569,17 @@ static void sig_ind(PLCI *plci)
                cip = find_cip(a, parms[4], parms[6]);
                cip_mask = 1L << cip;
                dbug(1, dprintf("cip=%d,cip_mask=%lx", cip, cip_mask));
-               clear_c_ind_mask(plci);
+               bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
                if (!remove_started && !a->adapter_disabled)
                {
-                       set_c_ind_mask_bit(plci, MAX_APPL);
                        group_optimization(a, plci);
-                       for (i = 0; i < max_appl; i++) {
+                       for_each_set_bit(i, plci->group_optimization_mask_table, max_appl) {
                                if (application[i].Id
                                    && (a->CIP_Mask[i] & 1 || a->CIP_Mask[i] & cip_mask)
-                                   && CPN_filter_ok(parms[0], a, i)
-                                   && test_group_ind_mask_bit(plci, i)) {
+                                   && CPN_filter_ok(parms[0], a, i)) {
                                        dbug(1, dprintf("storedcip_mask[%d]=0x%lx", i, a->CIP_Mask[i]));
-                                       set_c_ind_mask_bit(plci, i);
-                                       dump_c_ind_mask(plci);
+                                       __set_bit(i, plci->c_ind_mask_table);
+                                       dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
                                        plci->State = INC_CON_PENDING;
                                        plci->call_dir = (plci->call_dir & ~(CALL_DIR_OUT | CALL_DIR_ORIGINATE)) |
                                                CALL_DIR_IN | CALL_DIR_ANSWER;
@@ -5750,10 +5625,9 @@ static void sig_ind(PLCI *plci)
                                                      SendMultiIE(plci, Id, multi_pi_parms, PI, 0x210, true));
                                }
                        }
-                       clear_c_ind_mask_bit(plci, MAX_APPL);
-                       dump_c_ind_mask(plci);
+                       dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
                }
-               if (c_ind_mask_empty(plci)) {
+               if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
                        sig_req(plci, HANGUP, 0);
                        send_req(plci);
                        plci->State = IDLE;
@@ -5994,13 +5868,13 @@ static void sig_ind(PLCI *plci)
                break;
 
        case RESUME:
-               clear_c_ind_mask_bit(plci, (word)(plci->appl->Id - 1));
+               __clear_bit(plci->appl->Id - 1, plci->c_ind_mask_table);
                PUT_WORD(&resume_cau[4], GOOD);
                sendf(plci->appl, _FACILITY_I, Id, 0, "ws", (word)3, resume_cau);
                break;
 
        case SUSPEND:
-               clear_c_ind_mask(plci);
+               bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 
                if (plci->NL.Id && !plci->nl_remove_id) {
                        mixer_remove(plci);
@@ -6037,15 +5911,12 @@ static void sig_ind(PLCI *plci)
 
                if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
                {
-                       for (i = 0; i < max_appl; i++)
-                       {
-                               if (test_c_ind_mask_bit(plci, i))
-                                       sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-                       }
+                       for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+                               sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
                }
                else
                {
-                       clear_c_ind_mask(plci);
+                       bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
                }
                if (!plci->appl)
                {
@@ -6055,7 +5926,7 @@ static void sig_ind(PLCI *plci)
                                a->listen_active--;
                        }
                        plci->State = INC_DIS_PENDING;
-                       if (c_ind_mask_empty(plci))
+                       if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
                        {
                                plci->State = IDLE;
                                if (plci->NL.Id && !plci->nl_remove_id)
@@ -6341,14 +6212,10 @@ static void SendInfo(PLCI *plci, dword Id, byte **parms, byte iesent)
                            || Info_Number == DSP
                            || Info_Number == UUI)
                        {
-                               for (j = 0; j < max_appl; j++)
-                               {
-                                       if (test_c_ind_mask_bit(plci, j))
-                                       {
-                                               dbug(1, dprintf("Ovl_Ind"));
-                                               iesent = true;
-                                               sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-                                       }
+                               for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+                                       dbug(1, dprintf("Ovl_Ind"));
+                                       iesent = true;
+                                       sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
                                }
                        }
                }               /* all other signalling states */
@@ -6416,14 +6283,10 @@ static byte SendMultiIE(PLCI *plci, dword Id, byte **parms, byte ie_type,
                }
                else if (!plci->appl && Info_Number)
                {                                        /* overlap receiving broadcast */
-                       for (j = 0; j < max_appl; j++)
-                       {
-                               if (test_c_ind_mask_bit(plci, j))
-                               {
-                                       iesent = true;
-                                       dbug(1, dprintf("Mlt_Ovl_Ind"));
-                                       sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-                               }
+                       for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+                               iesent = true;
+                               dbug(1, dprintf("Mlt_Ovl_Ind"));
+                               sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
                        }
                }                                        /* all other signalling states */
                else if (Info_Number
@@ -7270,7 +7133,6 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
        word i, j;
        PLCI *plci;
 
-       dump_plcis(a);
        for (i = 0; i < a->max_plci && a->plci[i].Id; i++);
        if (i == a->max_plci) {
                dbug(1, dprintf("get_plci: out of PLCIs"));
@@ -7321,8 +7183,8 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
 
        plci->ncci_ring_list = 0;
        for (j = 0; j < MAX_CHANNELS_PER_PLCI; j++) plci->inc_dis_ncci_table[j] = 0;
-       clear_c_ind_mask(plci);
-       set_group_ind_mask(plci);
+       bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
+       bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
        plci->fax_connect_info_length = 0;
        plci->nsf_control_bits = 0;
        plci->ncpi_state = 0x00;
@@ -9373,10 +9235,10 @@ word CapiRelease(word Id)
                                        if (plci->State == INC_CON_PENDING
                                            || plci->State == INC_CON_ALERT)
                                        {
-                                               if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+                                               if (test_bit(Id - 1, plci->c_ind_mask_table))
                                                {
-                                                       clear_c_ind_mask_bit(plci, (word)(Id - 1));
-                                                       if (c_ind_mask_empty(plci))
+                                                       __clear_bit(Id - 1, plci->c_ind_mask_table);
+                                                       if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
                                                        {
                                                                sig_req(plci, HANGUP, 0);
                                                                send_req(plci);
@@ -9384,10 +9246,10 @@ word CapiRelease(word Id)
                                                        }
                                                }
                                        }
-                                       if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+                                       if (test_bit(Id - 1, plci->c_ind_mask_table))
                                        {
-                                               clear_c_ind_mask_bit(plci, (word)(Id - 1));
-                                               if (c_ind_mask_empty(plci))
+                                               __clear_bit(Id - 1, plci->c_ind_mask_table);
+                                               if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
                                                {
                                                        if (!plci->appl)
                                                        {
@@ -9452,7 +9314,7 @@ word CapiRelease(word Id)
 static word plci_remove_check(PLCI *plci)
 {
        if (!plci) return true;
-       if (!plci->NL.Id && c_ind_mask_empty(plci))
+       if (!plci->NL.Id && bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
        {
                if (plci->Sig.Id == 0xff)
                        plci->Sig.Id = 0;
@@ -14735,7 +14597,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
        word appl_number_group_type[MAX_APPL];
        PLCI *auxplci;
 
-       set_group_ind_mask(plci); /* all APPLs within this inc. call are allowed to dial in */
+       /* all APPLs within this inc. call are allowed to dial in */
+       bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
 
        if (!a->group_optimization_enabled)
        {
@@ -14771,13 +14634,12 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
                                if (a->plci[k].Id)
                                {
                                        auxplci = &a->plci[k];
-                                       if (auxplci->appl == &application[i]) /* application has a busy PLCI */
-                                       {
+                                       if (auxplci->appl == &application[i]) {
+                                               /* application has a busy PLCI */
                                                busy = true;
                                                dbug(1, dprintf("Appl 0x%x is busy", i + 1));
-                                       }
-                                       else if (test_c_ind_mask_bit(auxplci, i)) /* application has an incoming call pending */
-                                       {
+                                       } else if (test_bit(i, plci->c_ind_mask_table)) {
+                                               /* application has an incoming call pending */
                                                busy = true;
                                                dbug(1, dprintf("Appl 0x%x has inc. call pending", i + 1));
                                        }
@@ -14826,7 +14688,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
                                        if (appl_number_group_type[i] == appl_number_group_type[j])
                                        {
                                                dbug(1, dprintf("Appl 0x%x is member of group 0x%x, no call", j + 1, appl_number_group_type[j]));
-                                               clear_group_ind_mask_bit(plci, j);           /* disable call on other group members */
+                                               /* disable call on other group members */
+                                               __clear_bit(j, plci->group_optimization_mask_table);
                                                appl_number_group_type[j] = 0;       /* remove disabled group member from group list */
                                        }
                                }
@@ -14834,7 +14697,7 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
                }
                else                                                 /* application should not get a call */
                {
-                       clear_group_ind_mask_bit(plci, i);
+                       __clear_bit(i, plci->group_optimization_mask_table);
                }
        }
 
index cbb4f85..d09b2b4 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/if_arcnet.h>
 
 #ifdef __KERNEL__
-#include  <linux/irqreturn.h>
+#include <linux/interrupt.h>
 
 /*
  * RECON_THRESHOLD is the maximum number of RECON messages to receive
index 01cab95..eb7f767 100644 (file)
@@ -109,7 +109,7 @@ static struct attribute *com20020_state_attrs[] = {
        NULL,
 };
 
-static struct attribute_group com20020_state_group = {
+static const struct attribute_group com20020_state_group = {
        .name = NULL,
        .attrs = com20020_state_attrs,
 };
index 770623a..040b493 100644 (file)
@@ -759,7 +759,7 @@ static struct attribute *per_bond_attrs[] = {
        NULL,
 };
 
-static struct attribute_group bonding_group = {
+static const struct attribute_group bonding_group = {
        .name = "bonding",
        .attrs = per_bond_attrs,
 };
index 0e0df0b..f37ce0e 100644 (file)
@@ -1232,7 +1232,7 @@ static struct attribute *at91_sysfs_attrs[] = {
        NULL,
 };
 
-static struct attribute_group at91_sysfs_attr_group = {
+static const struct attribute_group at91_sysfs_attr_group = {
        .attrs = at91_sysfs_attrs,
 };
 
index 2ba1a81..12a53c8 100644 (file)
@@ -1875,7 +1875,7 @@ static struct attribute *ican3_sysfs_attrs[] = {
        NULL,
 };
 
-static struct attribute_group ican3_sysfs_attr_group = {
+static const struct attribute_group ican3_sysfs_attr_group = {
        .attrs = ican3_sysfs_attrs,
 };
 
index 5bcdd33..7fa19d4 100644 (file)
@@ -810,31 +810,40 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
        mutex_unlock(&chip->reg_lock);
 }
 
-static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
-                            struct ethtool_eee *e)
+static int mv88e6xxx_energy_detect_read(struct mv88e6xxx_chip *chip, int port,
+                                       struct ethtool_eee *eee)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
-       u16 reg;
        int err;
 
-       if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
+       if (!chip->info->ops->phy_energy_detect_read)
                return -EOPNOTSUPP;
 
-       mutex_lock(&chip->reg_lock);
-
-       err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+       /* assign eee->eee_enabled and eee->tx_lpi_enabled */
+       err = chip->info->ops->phy_energy_detect_read(chip, port, eee);
        if (err)
-               goto out;
+               return err;
 
-       e->eee_enabled = !!(reg & 0x0200);
-       e->tx_lpi_enabled = !!(reg & 0x0100);
+       /* assign eee->eee_active */
+       return mv88e6xxx_port_status_eee(chip, port, eee);
+}
 
-       err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
-       if (err)
-               goto out;
+static int mv88e6xxx_energy_detect_write(struct mv88e6xxx_chip *chip, int port,
+                                        struct ethtool_eee *eee)
+{
+       if (!chip->info->ops->phy_energy_detect_write)
+               return -EOPNOTSUPP;
 
-       e->eee_active = !!(reg & MV88E6352_PORT_STS_EEE);
-out:
+       return chip->info->ops->phy_energy_detect_write(chip, port, eee);
+}
+
+static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
+                            struct ethtool_eee *e)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_energy_detect_read(chip, port, e);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -844,26 +853,10 @@ static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
                             struct phy_device *phydev, struct ethtool_eee *e)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       u16 reg;
        int err;
 
-       if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
-               return -EOPNOTSUPP;
-
        mutex_lock(&chip->reg_lock);
-
-       err = mv88e6xxx_phy_read(chip, port, 16, &reg);
-       if (err)
-               goto out;
-
-       reg &= ~0x0300;
-       if (e->eee_enabled)
-               reg |= 0x0200;
-       if (e->tx_lpi_enabled)
-               reg |= 0x0100;
-
-       err = mv88e6xxx_phy_write(chip, port, 16, reg);
-out:
+       err = mv88e6xxx_energy_detect_write(chip, port, e);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -926,6 +919,22 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
                dev_err(ds->dev, "p%d: failed to update state\n", port);
 }
 
+static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip)
+{
+       if (chip->info->ops->pot_clear)
+               return chip->info->ops->pot_clear(chip);
+
+       return 0;
+}
+
+static int mv88e6xxx_rsvd2cpu_setup(struct mv88e6xxx_chip *chip)
+{
+       if (chip->info->ops->mgmt_rsvd2cpu)
+               return chip->info->ops->mgmt_rsvd2cpu(chip);
+
+       return 0;
+}
+
 static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip)
 {
        int err;
@@ -2116,7 +2125,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
                goto unlock;
 
        /* Setup Switch Global 2 Registers */
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+       if (chip->info->global2_addr) {
                err = mv88e6xxx_g2_setup(chip);
                if (err)
                        goto unlock;
@@ -2142,16 +2151,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
        if (err)
                goto unlock;
 
-       /* Some generations have the configuration of sending reserved
-        * management frames to the CPU in global2, others in
-        * global1. Hence it does not fit the two setup functions
-        * above.
-        */
-       if (chip->info->ops->mgmt_rsvd2cpu) {
-               err = chip->info->ops->mgmt_rsvd2cpu(chip);
-               if (err)
-                       goto unlock;
-       }
+       err = mv88e6xxx_pot_setup(chip);
+       if (err)
+               goto unlock;
+
+       err = mv88e6xxx_rsvd2cpu_setup(chip);
+       if (err)
+               goto unlock;
 
 unlock:
        mutex_unlock(&chip->reg_lock);
@@ -2385,7 +2391,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
@@ -2408,7 +2415,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
        .stats_get_sset_count = mv88e6095_stats_get_sset_count,
        .stats_get_strings = mv88e6095_stats_get_strings,
        .stats_get_stats = mv88e6095_stats_get_stats,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
@@ -2441,7 +2448,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2467,7 +2475,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2496,7 +2505,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
@@ -2512,6 +2521,8 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -2533,6 +2544,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2563,7 +2575,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2587,7 +2600,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2619,7 +2633,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2633,6 +2648,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
@@ -2653,7 +2670,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2686,7 +2704,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2700,6 +2719,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
@@ -2720,7 +2741,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2746,7 +2768,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
        .ppu_enable = mv88e6185_g1_ppu_enable,
        .ppu_disable = mv88e6185_g1_ppu_disable,
        .reset = mv88e6185_g1_reset,
@@ -2762,6 +2784,8 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -2782,6 +2806,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2796,6 +2821,8 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -2816,6 +2843,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2830,6 +2858,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -2850,6 +2880,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2864,6 +2895,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
@@ -2884,7 +2917,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2899,6 +2933,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -2920,6 +2956,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2934,6 +2971,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
@@ -2952,20 +2991,23 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .stats_get_stats = mv88e6320_stats_get_stats,
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6185_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
-       /* MV88E6XXX_FAMILY_6321 */
+       /* MV88E6XXX_FAMILY_6320 */
        .irl_init_all = mv88e6352_g2_irl_init_all,
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_speed = mv88e6185_port_set_speed,
@@ -2997,6 +3039,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -3018,6 +3062,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3049,7 +3094,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3081,7 +3127,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3095,6 +3142,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6352_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6352_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
@@ -3115,7 +3164,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .set_cpu_port = mv88e6095_g1_set_cpu_port,
        .set_egress_port = mv88e6095_g1_set_egress_port,
        .watchdog_ops = &mv88e6097_watchdog_ops,
-       .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+       .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3130,6 +3180,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -3153,6 +3205,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3167,6 +3220,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .phy_energy_detect_read = mv88e6390_phy_energy_detect_read,
+       .phy_energy_detect_write = mv88e6390_phy_energy_detect_write,
        .port_set_link = mv88e6xxx_port_set_link,
        .port_set_duplex = mv88e6xxx_port_set_duplex,
        .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
@@ -3190,6 +3245,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .set_egress_port = mv88e6390_g1_set_egress_port,
        .watchdog_ops = &mv88e6390_watchdog_ops,
        .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+       .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3206,12 +3262,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6097,
                .ops = &mv88e6085_ops,
        },
 
@@ -3224,11 +3282,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
                .atu_move_port_mask = 0xf,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6095,
                .ops = &mv88e6095_ops,
        },
 
@@ -3241,12 +3300,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6097,
                .ops = &mv88e6097_ops,
        },
 
@@ -3259,12 +3320,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6123_ops,
        },
 
@@ -3277,11 +3340,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
                .atu_move_port_mask = 0xf,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6185,
                .ops = &mv88e6131_ops,
        },
 
@@ -3294,11 +3358,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .atu_move_port_mask = 0x1f,
+               .g2_irqs = 10,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6341,
                .ops = &mv88e6141_ops,
        },
 
@@ -3311,12 +3377,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6161_ops,
        },
 
@@ -3329,12 +3397,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6165,
                .ops = &mv88e6165_ops,
        },
 
@@ -3347,12 +3417,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6171_ops,
        },
 
@@ -3365,12 +3437,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6172_ops,
        },
 
@@ -3383,12 +3457,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6175_ops,
        },
 
@@ -3401,12 +3477,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6176_ops,
        },
 
@@ -3419,11 +3497,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
                .atu_move_port_mask = 0xf,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6185,
                .ops = &mv88e6185_ops,
        },
 
@@ -3436,12 +3515,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .tag_protocol = DSA_TAG_PROTO_DSA,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .pvt = true,
+               .multi_chip = true,
                .atu_move_port_mask = 0x1f,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6190_ops,
        },
 
@@ -3454,12 +3535,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .atu_move_port_mask = 0x1f,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6190x_ops,
        },
 
@@ -3472,12 +3555,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .atu_move_port_mask = 0x1f,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6191_ops,
        },
 
@@ -3490,12 +3575,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6240_ops,
        },
 
@@ -3508,12 +3595,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .atu_move_port_mask = 0x1f,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6290_ops,
        },
 
@@ -3526,12 +3615,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6320,
                .ops = &mv88e6320_ops,
        },
 
@@ -3544,11 +3634,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 8,
                .atu_move_port_mask = 0xf,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6320,
                .ops = &mv88e6321_ops,
        },
 
@@ -3561,11 +3652,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .atu_move_port_mask = 0x1f,
+               .g2_irqs = 10,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6341,
                .ops = &mv88e6341_ops,
        },
 
@@ -3578,12 +3671,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6350_ops,
        },
 
@@ -3596,12 +3691,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6351,
                .ops = &mv88e6351_ops,
        },
 
@@ -3614,12 +3711,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 4095,
                .port_base_addr = 0x10,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 15000,
                .g1_irqs = 9,
+               .g2_irqs = 10,
                .atu_move_port_mask = 0xf,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6352_ops,
        },
        [MV88E6390] = {
@@ -3631,12 +3730,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .atu_move_port_mask = 0x1f,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6390_ops,
        },
        [MV88E6390X] = {
@@ -3648,12 +3749,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .max_vid = 8191,
                .port_base_addr = 0x0,
                .global1_addr = 0x1b,
+               .global2_addr = 0x1c,
                .age_time_coeff = 3750,
                .g1_irqs = 9,
+               .g2_irqs = 14,
                .atu_move_port_mask = 0x1f,
                .pvt = true,
+               .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
-               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
                .ops = &mv88e6390x_ops,
        },
 };
@@ -3723,7 +3826,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 {
        if (sw_addr == 0)
                chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-       else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
+       else if (chip->info->multi_chip)
                chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
        else
                return -EINVAL;
@@ -3971,7 +4074,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
                if (err)
                        goto out;
 
-               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) {
+               if (chip->info->g2_irqs > 0) {
                        err = mv88e6xxx_g2_irq_setup(chip);
                        if (err)
                                goto out_g1_irq;
@@ -3991,7 +4094,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
        mv88e6xxx_mdios_unregister(chip);
 out_g2_irq:
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0)
+       if (chip->info->g2_irqs > 0 && chip->irq > 0)
                mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
        if (chip->irq > 0) {
@@ -4013,7 +4116,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
        mv88e6xxx_mdios_unregister(chip);
 
        if (chip->irq > 0) {
-               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+               if (chip->info->g2_irqs > 0)
                        mv88e6xxx_g2_irq_free(chip);
                mv88e6xxx_g1_irq_free(chip);
        }
index 0864440..9111e13 100644 (file)
@@ -97,133 +97,6 @@ enum mv88e6xxx_family {
        MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
 };
 
-enum mv88e6xxx_cap {
-       /* Energy Efficient Ethernet.
-        */
-       MV88E6XXX_CAP_EEE,
-
-       /* Multi-chip Addressing Mode.
-        * Some chips respond to only 2 registers of its own SMI device address
-        * when it is non-zero, and use indirect access to internal registers.
-        */
-       MV88E6XXX_CAP_SMI_CMD,          /* (0x00) SMI Command */
-       MV88E6XXX_CAP_SMI_DATA,         /* (0x01) SMI Data */
-
-       /* Switch Global (1) Registers.
-        */
-       MV88E6XXX_CAP_G1_ATU_FID,       /* (0x01) ATU FID Register */
-       MV88E6XXX_CAP_G1_VTU_FID,       /* (0x02) VTU FID Register */
-
-       /* Switch Global 2 Registers.
-        * The device contains a second set of global 16-bit registers.
-        */
-       MV88E6XXX_CAP_GLOBAL2,
-       MV88E6XXX_CAP_G2_INT,           /* (0x00) Interrupt Status */
-       MV88E6XXX_CAP_G2_MGMT_EN_2X,    /* (0x02) MGMT Enable Register 2x */
-       MV88E6XXX_CAP_G2_MGMT_EN_0X,    /* (0x03) MGMT Enable Register 0x */
-       MV88E6XXX_CAP_G2_POT,           /* (0x0f) Priority Override Table */
-
-       /* Per VLAN Spanning Tree Unit (STU).
-        * The Port State database, if present, is accessed through VTU
-        * operations and dedicated SID registers. See MV88E6352_G1_VTU_SID.
-        */
-       MV88E6XXX_CAP_STU,
-
-       /* VLAN Table Unit.
-        * The VTU is used to program 802.1Q VLANs. See MV88E6XXX_G1_VTU_OP.
-        */
-       MV88E6XXX_CAP_VTU,
-};
-
-/* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EEE             BIT_ULL(MV88E6XXX_CAP_EEE)
-
-#define MV88E6XXX_FLAG_SMI_CMD         BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
-#define MV88E6XXX_FLAG_SMI_DATA                BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
-
-#define MV88E6XXX_FLAG_G1_VTU_FID      BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID)
-
-#define MV88E6XXX_FLAG_GLOBAL2         BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_INT          BIT_ULL(MV88E6XXX_CAP_G2_INT)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X   BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X   BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_POT          BIT_ULL(MV88E6XXX_CAP_G2_POT)
-
-/* Multi-chip Addressing Mode */
-#define MV88E6XXX_FLAGS_MULTI_CHIP     \
-       (MV88E6XXX_FLAG_SMI_CMD |       \
-        MV88E6XXX_FLAG_SMI_DATA)
-
-#define MV88E6XXX_FLAGS_FAMILY_6095    \
-       (MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6097    \
-       (MV88E6XXX_FLAG_G1_VTU_FID |    \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6165    \
-       (MV88E6XXX_FLAG_G1_VTU_FID |    \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6185    \
-       (MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6320    \
-       (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6341    \
-       (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_G1_VTU_FID |    \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6351    \
-       (MV88E6XXX_FLAG_G1_VTU_FID |    \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6352    \
-       (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_G1_VTU_FID |    \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
-        MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
-        MV88E6XXX_FLAG_G2_POT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6390    \
-       (MV88E6XXX_FLAG_EEE |           \
-        MV88E6XXX_FLAG_GLOBAL2 |       \
-        MV88E6XXX_FLAG_G2_INT |        \
-        MV88E6XXX_FLAGS_MULTI_CHIP)
-
 struct mv88e6xxx_ops;
 
 struct mv88e6xxx_info {
@@ -235,11 +108,18 @@ struct mv88e6xxx_info {
        unsigned int max_vid;
        unsigned int port_base_addr;
        unsigned int global1_addr;
+       unsigned int global2_addr;
        unsigned int age_time_coeff;
        unsigned int g1_irqs;
+       unsigned int g2_irqs;
        bool pvt;
+
+       /* Multi-chip Addressing Mode.
+        * Some chips respond to only 2 registers of its own SMI device address
+        * when it is non-zero, and use indirect access to internal registers.
+        */
+       bool multi_chip;
        enum dsa_tag_protocol tag_protocol;
-       unsigned long long flags;
 
        /* Mask for FromPort and ToPort value of PortVec used in ATU Move
         * operation. 0 means that the ATU Move operation is not supported.
@@ -359,6 +239,15 @@ struct mv88e6xxx_ops {
                         struct mii_bus *bus,
                         int addr, int reg, u16 val);
 
+       /* Copper Energy Detect operations */
+       int (*phy_energy_detect_read)(struct mv88e6xxx_chip *chip, int phy,
+                                     struct ethtool_eee *eee);
+       int (*phy_energy_detect_write)(struct mv88e6xxx_chip *chip, int phy,
+                                      struct ethtool_eee *eee);
+
+       /* Priority Override Table operations */
+       int (*pot_clear)(struct mv88e6xxx_chip *chip);
+
        /* PHY Polling Unit (PPU) operations */
        int (*ppu_enable)(struct mv88e6xxx_chip *chip);
        int (*ppu_disable)(struct mv88e6xxx_chip *chip);
@@ -449,7 +338,6 @@ struct mv88e6xxx_ops {
        int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
        const struct mv88e6xxx_irq_ops *watchdog_ops;
 
-       /* Can be either in g1 or g2, so don't use a prefix */
        int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
 
        /* Power on/off a SERDES interface */
@@ -482,12 +370,6 @@ struct mv88e6xxx_hw_stat {
        int type;
 };
 
-static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
-                                unsigned long flags)
-{
-       return (chip->info->flags & flags) == flags;
-}
-
 static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip)
 {
        return chip->info->pvt;
index 158d0f4..16f5562 100644 (file)
 
 static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
-       return mv88e6xxx_read(chip, MV88E6XXX_G2, reg, val);
+       return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
-       return mv88e6xxx_write(chip, MV88E6XXX_G2, reg, val);
+       return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
-       return mv88e6xxx_update(chip, MV88E6XXX_G2, reg, update);
+       return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
 static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
-       return mv88e6xxx_wait(chip, MV88E6XXX_G2, reg, mask);
+       return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
+}
+
+/* Offset 0x00: Interrupt Source Register */
+
+static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src)
+{
+       /* Read (and clear most of) the Interrupt Source bits */
+       return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SRC, src);
+}
+
+/* Offset 0x01: Interrupt Mask Register */
+
+static int mv88e6xxx_g2_int_mask(struct mv88e6xxx_chip *chip, u16 mask)
+{
+       return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, mask);
 }
 
 /* Offset 0x02: Management Enable 2x */
+
+static int mv88e6xxx_g2_mgmt_enable_2x(struct mv88e6xxx_chip *chip, u16 en2x)
+{
+       return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, en2x);
+}
+
 /* Offset 0x03: Management Enable 0x */
 
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_g2_mgmt_enable_0x(struct mv88e6xxx_chip *chip, u16 en0x)
+{
+       return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, en0x);
+}
+
+/* Offset 0x05: Switch Management Register */
+
+static int mv88e6xxx_g2_switch_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip,
+                                            bool enable)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SWITCH_MGMT, &val);
+       if (err)
+               return err;
+
+       if (enable)
+               val |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+       else
+               val &= ~MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+
+       return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, val);
+}
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
        int err;
 
        /* Consider the frames with reserved multicast destination
-        * addresses matching 01:80:c2:00:00:2x as MGMT.
+        * addresses matching 01:80:c2:00:00:0x as MGMT.
         */
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
-               err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, 0xffff);
-               if (err)
-                       return err;
-       }
+       err = mv88e6xxx_g2_mgmt_enable_0x(chip, 0xffff);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_switch_mgmt_rsvd2cpu(chip, true);
+}
+
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+       int err;
 
        /* Consider the frames with reserved multicast destination
-        * addresses matching 01:80:c2:00:00:0x as MGMT.
+        * addresses matching 01:80:c2:00:00:2x as MGMT.
         */
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X))
-               return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X,
-                                         0xffff);
+       err = mv88e6xxx_g2_mgmt_enable_2x(chip, 0xffff);
+       if (err)
+               return err;
 
-       return 0;
+       return mv88e6185_g2_mgmt_rsvd2cpu(chip);
 }
 
 /* Offset 0x06: Device Mapping Table register */
@@ -260,7 +311,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
        return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val);
 }
 
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
        int i, err;
 
@@ -933,7 +984,7 @@ static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id)
        u16 reg;
 
        mutex_lock(&chip->reg_lock);
-       err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SOURCE, &reg);
+       err = mv88e6xxx_g2_int_source(chip, &reg);
        mutex_unlock(&chip->reg_lock);
        if (err)
                goto out;
@@ -959,8 +1010,11 @@ static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d)
 static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d)
 {
        struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
+       int err;
 
-       mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, ~chip->g2_irq.masked);
+       err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked);
+       if (err)
+               dev_err(chip->dev, "failed to mask interrupts\n");
 
        mutex_unlock(&chip->reg_lock);
 }
@@ -1063,9 +1117,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
         * port at the highest priority.
         */
        reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4);
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
-           mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
-               reg |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU | 0x7;
        err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg);
        if (err)
                return err;
@@ -1080,12 +1131,5 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
        if (err)
                return err;
 
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
-               /* Clear the priority override table. */
-               err = mv88e6xxx_g2_clear_pot(chip);
-               if (err)
-                       return err;
-       }
-
        return 0;
 }
index 317ffd8..669f590 100644 (file)
 
 #include "chip.h"
 
-#define MV88E6XXX_G2   0x1c
-
 /* Offset 0x00: Interrupt Source Register */
-#define MV88E6XXX_G2_INT_SOURCE                        0x00
+#define MV88E6XXX_G2_INT_SRC                   0x00
+#define MV88E6XXX_G2_INT_SRC_WDOG              0x8000
+#define MV88E6XXX_G2_INT_SRC_JAM_LIMIT         0x4000
+#define MV88E6XXX_G2_INT_SRC_DUPLEX_MISMATCH   0x2000
+#define MV88E6XXX_G2_INT_SRC_WAKE_EVENT                0x1000
+#define MV88E6352_G2_INT_SRC_SERDES            0x0800
+#define MV88E6352_G2_INT_SRC_PHY               0x001f
+#define MV88E6390_G2_INT_SRC_PHY               0x07fe
+
 #define MV88E6XXX_G2_INT_SOURCE_WATCHDOG       15
 
 /* Offset 0x01: Interrupt Mask Register */
-#define MV88E6XXX_G2_INT_MASK  0x01
+#define MV88E6XXX_G2_INT_MASK                  0x01
+#define MV88E6XXX_G2_INT_MASK_WDOG             0x8000
+#define MV88E6XXX_G2_INT_MASK_JAM_LIMIT                0x4000
+#define MV88E6XXX_G2_INT_MASK_DUPLEX_MISMATCH  0x2000
+#define MV88E6XXX_G2_INT_MASK_WAKE_EVENT       0x1000
+#define MV88E6352_G2_INT_MASK_SERDES           0x0800
+#define MV88E6352_G2_INT_MASK_PHY              0x001f
+#define MV88E6390_G2_INT_MASK_PHY              0x07fe
 
 /* Offset 0x02: MGMT Enable Register 2x */
 #define MV88E6XXX_G2_MGMT_EN_2X                0x02
@@ -245,7 +258,11 @@ int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
@@ -254,7 +271,7 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 {
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+       if (chip->info->global2_addr) {
                dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
                return -EOPNOTSUPP;
        }
@@ -347,7 +364,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
-static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
        return -EOPNOTSUPP;
 }
index 3500ac0..317ae89 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <linux/mdio.h>
 #include <linux/module.h>
-#include <net/dsa.h>
 
 #include "chip.h"
 #include "phy.h"
@@ -247,3 +246,99 @@ int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip)
 {
        return mv88e6xxx_phy_ppu_enable(chip);
 }
+
+/* Page 0, Register 16: Copper Specific Control Register 1 */
+
+int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy,
+                                    struct ethtool_eee *eee)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val);
+       if (err)
+               return err;
+
+       val &= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK;
+
+       eee->eee_enabled = false;
+       eee->tx_lpi_enabled = false;
+
+       switch (val) {
+       case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP:
+               eee->tx_lpi_enabled = true;
+               /* fall through... */
+       case MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV:
+               eee->eee_enabled = true;
+       }
+
+       return 0;
+}
+
+int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy,
+                                     struct ethtool_eee *eee)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val);
+       if (err)
+               return err;
+
+       val &= ~MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK;
+
+       if (eee->eee_enabled)
+               val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV;
+       if (eee->tx_lpi_enabled)
+               val |= MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP;
+
+       return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val);
+}
+
+int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy,
+                                    struct ethtool_eee *eee)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val);
+       if (err)
+               return err;
+
+       val &= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK;
+
+       eee->eee_enabled = false;
+       eee->tx_lpi_enabled = false;
+
+       switch (val) {
+       case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO:
+       case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW:
+               eee->tx_lpi_enabled = true;
+               /* fall through... */
+       case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO:
+       case MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW:
+               eee->eee_enabled = true;
+       }
+
+       return 0;
+}
+
+int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy,
+                                     struct ethtool_eee *eee)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_phy_read(chip, phy, MV88E6XXX_PHY_CSCTL1, &val);
+       if (err)
+               return err;
+
+       val &= ~MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK;
+
+       if (eee->eee_enabled)
+               val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO;
+       if (eee->tx_lpi_enabled)
+               val |= MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO;
+
+       return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_CSCTL1, val);
+}
index 556b74a..9888027 100644 (file)
 #define MV88E6XXX_PHY_PAGE             0x16
 #define MV88E6XXX_PHY_PAGE_COPPER      0x00
 
+/* Page 0, Register 16: Copper Specific Control Register 1 */
+#define MV88E6XXX_PHY_CSCTL1                                   16
+#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_MASK                        0x0300
+#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK            0x0100 /* 0x */
+#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV           0x0200
+#define MV88E6352_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP           0x0300
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_MASK                        0x0380
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_OFF_MASK            0x0180 /* 0xx */
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_AUTO      0x0200
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_RCV_SW                0x0280
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_AUTO      0x0300
+#define MV88E6390_PHY_CSCTL1_ENERGY_DETECT_SENSE_NLP_SW                0x0380
+
 /* PHY Registers accesses implementations */
 int mv88e6165_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
                       int addr, int reg, u16 *val);
@@ -40,4 +53,13 @@ void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_phy_setup(struct mv88e6xxx_chip *chip);
 
+int mv88e6352_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy,
+                                    struct ethtool_eee *eee);
+int mv88e6352_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy,
+                                     struct ethtool_eee *eee);
+int mv88e6390_phy_energy_detect_read(struct mv88e6xxx_chip *chip, int phy,
+                                    struct ethtool_eee *eee);
+int mv88e6390_phy_energy_detect_write(struct mv88e6xxx_chip *chip, int phy,
+                                     struct ethtool_eee *eee);
+
 #endif /*_MV88E6XXX_PHY_H */
index a7801f6..2837a91 100644 (file)
@@ -35,6 +35,23 @@ int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
        return mv88e6xxx_write(chip, addr, reg, val);
 }
 
+/* Offset 0x00: Port Status Register */
+
+int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port,
+                             struct ethtool_eee *eee)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &val);
+       if (err)
+               return err;
+
+       eee->eee_active = !!(val & MV88E6352_PORT_STS_EEE);
+
+       return 0;
+}
+
 /* Offset 0x01: MAC (or PCS or Physical) Control Register
  *
  * Link, Duplex and Flow Control have one force bit, one value bit.
index 8f3991b..6fcab30 100644 (file)
 /* Offset 0x13: OutFiltered Counter */
 #define MV88E6XXX_PORT_OUT_FILTERED    0x13
 
-/* Offset 0x16: LED Control */
-#define MV88E6XXX_PORT_LED_CONTROL     0x16
-
 /* Offset 0x18: IEEE Priority Mapping Table */
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE                     0x18
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE_UPDATE              0x8000
@@ -244,6 +241,9 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
                         u16 val);
 
+int mv88e6xxx_port_status_eee(struct mv88e6xxx_chip *chip, int port,
+                             struct ethtool_eee *eee);
+
 int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
                                   phy_interface_t mode);
 int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
index d0c165d..d0a1f9c 100644 (file)
@@ -345,7 +345,7 @@ static void dummy_setup(struct net_device *dev)
        dev->flags &= ~IFF_MULTICAST;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
        dev->features   |= NETIF_F_SG | NETIF_F_FRAGLIST;
-       dev->features   |= NETIF_F_ALL_TSO | NETIF_F_UFO;
+       dev->features   |= NETIF_F_ALL_TSO;
        dev->features   |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
        dev->features   |= NETIF_F_GSO_ENCAP_ALL;
        dev->hw_features |= dev->features;
index 0938294..e9282c9 100644 (file)
 #include <net/dcbnl.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
+#include <linux/interrupt.h>
 
 #define XGBE_DRV_NAME          "amd-xgbe"
 #define XGBE_DRV_VERSION       "1.0.3"
index 26d2574..6df2cad 100644 (file)
@@ -68,7 +68,7 @@
 #define GEM_MAX_TX_LEN         ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
 
 #define GEM_MTU_MIN_SIZE       ETH_MIN_MTU
-#define MACB_NETIF_LSO         (NETIF_F_TSO | NETIF_F_UFO)
+#define MACB_NETIF_LSO         NETIF_F_TSO
 
 #define MACB_WOL_HAS_MAGIC_PACKET      (0x1 << 0)
 #define MACB_WOL_ENABLED               (0x1 << 1)
index 9906fda..248a8fc 100644 (file)
@@ -128,7 +128,7 @@ static void macb_remove(struct pci_dev *pdev)
        clk_unregister(plat_data->hclk);
 }
 
-static struct pci_device_id dev_id_table[] = {
+static const struct pci_device_id dev_id_table[] = {
        { PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
        { 0, }
 };
index ebd353b..09e2875 100644 (file)
@@ -105,6 +105,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
        "tx_total_sent",
        "tx_total_fwd",
        "tx_err_pko",
+       "tx_err_pki",
        "tx_err_link",
        "tx_err_drop",
 
@@ -826,6 +827,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd);
        /*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko);
+       /*per_core_stats[j].link_stats[i].fromhost.fw_err_pki */
+       data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pki);
        /*per_core_stats[j].link_stats[i].fromhost.fw_err_link */
        data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link);
        /*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
@@ -1568,6 +1571,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
                tstats->fw_total_sent = rsp_tstats->fw_total_sent;
                tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
                tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+               tstats->fw_err_pki = rsp_tstats->fw_err_pki;
                tstats->fw_err_link = rsp_tstats->fw_err_link;
                tstats->fw_err_drop = rsp_tstats->fw_err_drop;
                tstats->fw_tso = rsp_tstats->fw_tso;
index 51583ae..1d8fefa 100644 (file)
@@ -2544,8 +2544,8 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
 {
        struct octeon_droq_ops droq_ops;
        struct net_device *netdev;
-       static int cpu_id;
-       static int cpu_id_modulus;
+       int cpu_id;
+       int cpu_id_modulus;
        struct octeon_droq *droq;
        struct napi_struct *napi;
        int q, q_no, retval = 0;
index 9b24710..935ff29 100644 (file)
@@ -1663,10 +1663,10 @@ static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
 {
        struct octeon_droq_ops droq_ops;
        struct net_device *netdev;
-       static int cpu_id_modulus;
+       int cpu_id_modulus;
        struct octeon_droq *droq;
        struct napi_struct *napi;
-       static int cpu_id;
+       int cpu_id;
        int num_tx_descs;
        struct lio *lio;
        int retval = 0;
index 231dd7f..53aaf41 100644 (file)
@@ -814,6 +814,7 @@ struct nic_tx_stats {
        u64 fw_tso;             /* number of tso requests */
        u64 fw_tso_fwd;         /* number of packets segmented in tso */
        u64 fw_tx_vxlan;
+       u64 fw_err_pki;
 };
 
 struct oct_link_stats {
index 623e28c..f10014f 100644 (file)
@@ -876,11 +876,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
        oct->num_iqs = 0;
 
-       oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+       oct->instr_queue[0] = vzalloc_node(sizeof(*oct->instr_queue[0]),
                                numa_node);
        if (!oct->instr_queue[0])
                oct->instr_queue[0] =
-                       vmalloc(sizeof(struct octeon_instr_queue));
+                       vzalloc(sizeof(struct octeon_instr_queue));
        if (!oct->instr_queue[0])
                return 1;
        memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
@@ -923,9 +923,9 @@ int octeon_setup_output_queues(struct octeon_device *oct)
                desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf));
        }
        oct->num_oqs = 0;
-       oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+       oct->droq[0] = vzalloc_node(sizeof(*oct->droq[0]), numa_node);
        if (!oct->droq[0])
-               oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+               oct->droq[0] = vzalloc(sizeof(*oct->droq[0]));
        if (!oct->droq[0])
                return 1;
 
index 2e190de..f7b5d68 100644 (file)
@@ -145,6 +145,8 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 
        for (i = 0; i < droq->max_count; i++) {
                pg_info = &droq->recv_buf_list[i].pg_info;
+               if (!pg_info)
+                       continue;
 
                if (pg_info->dma)
                        lio_unmap_ring(oct->pci_dev,
@@ -275,12 +277,12 @@ int octeon_init_droq(struct octeon_device *oct,
                droq->max_count);
 
        droq->recv_buf_list = (struct octeon_recv_buffer *)
-                             vmalloc_node(droq->max_count *
+                             vzalloc_node(droq->max_count *
                                                OCT_DROQ_RECVBUF_SIZE,
                                                numa_node);
        if (!droq->recv_buf_list)
                droq->recv_buf_list = (struct octeon_recv_buffer *)
-                                     vmalloc(droq->max_count *
+                                     vzalloc(droq->max_count *
                                                OCT_DROQ_RECVBUF_SIZE);
        if (!droq->recv_buf_list) {
                dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
index 0bc6a4f..6a01536 100644 (file)
@@ -793,7 +793,9 @@ static struct attribute *cxgb3_attrs[] = {
        NULL
 };
 
-static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs };
+static const struct attribute_group cxgb3_attr_group = {
+       .attrs = cxgb3_attrs,
+};
 
 static ssize_t tm_attr_show(struct device *d,
                            char *buf, int sched)
@@ -880,7 +882,9 @@ static struct attribute *offload_attrs[] = {
        NULL
 };
 
-static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
+static const struct attribute_group offload_attr_group = {
+       .attrs = offload_attrs,
+};
 
 /*
  * Sends an sk_buff to an offload queue driver
index ef4be78..1978abb 100644 (file)
@@ -338,10 +338,12 @@ struct adapter_params {
        unsigned int sf_nsec;             /* # of flash sectors */
        unsigned int sf_fw_start;         /* start of FW image in flash */
 
-       unsigned int fw_vers;
-       unsigned int bs_vers;           /* bootstrap version */
-       unsigned int tp_vers;
-       unsigned int er_vers;           /* expansion ROM version */
+       unsigned int fw_vers;             /* firmware version */
+       unsigned int bs_vers;             /* bootstrap version */
+       unsigned int tp_vers;             /* TP microcode version */
+       unsigned int er_vers;             /* expansion ROM version */
+       unsigned int scfg_vers;           /* Serial Configuration version */
+       unsigned int vpd_vers;            /* VPD Version */
        u8 api_vers[7];
 
        unsigned short mtus[NMTUS];
@@ -1407,6 +1409,10 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers);
 int t4_get_bs_version(struct adapter *adapter, u32 *vers);
 int t4_get_tp_version(struct adapter *adapter, u32 *vers);
 int t4_get_exprom_version(struct adapter *adapter, u32 *vers);
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers);
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers);
+int t4_get_version_info(struct adapter *adapter);
+void t4_dump_version_info(struct adapter *adapter);
 int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
               const u8 *fw_data, unsigned int fw_size,
               struct fw_hdr *card_fw, enum dev_state state, int *reset);
index e403fa1..fdf220a 100644 (file)
@@ -3610,11 +3610,8 @@ static int adap_init0(struct adapter *adap)
         * later reporting and B. to warn if the currently loaded firmware
         * is excessively mismatched relative to the driver.)
         */
-       t4_get_fw_version(adap, &adap->params.fw_vers);
-       t4_get_bs_version(adap, &adap->params.bs_vers);
-       t4_get_tp_version(adap, &adap->params.tp_vers);
-       t4_get_exprom_version(adap, &adap->params.er_vers);
 
+       t4_get_version_info(adap);
        ret = t4_check_fw_version(adap);
        /* If firmware is too old (not supported by driver) force an update. */
        if (ret)
@@ -4560,56 +4557,8 @@ static void cxgb4_check_pcie_caps(struct adapter *adap)
 /* Dump basic information about the adapter */
 static void print_adapter_info(struct adapter *adapter)
 {
-       /* Device information */
-       dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
-                adapter->params.vpd.id,
-                CHELSIO_CHIP_RELEASE(adapter->params.chip));
-       dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
-                adapter->params.vpd.sn, adapter->params.vpd.pn);
-
-       /* Firmware Version */
-       if (!adapter->params.fw_vers)
-               dev_warn(adapter->pdev_dev, "No firmware loaded\n");
-       else
-               dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
-                        FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
-                        FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
-                        FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
-                        FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
-
-       /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
-        * Firmware, so dev_info() is more appropriate here.)
-        */
-       if (!adapter->params.bs_vers)
-               dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
-       else
-               dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
-                        FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
-                        FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
-                        FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
-                        FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
-
-       /* TP Microcode Version */
-       if (!adapter->params.tp_vers)
-               dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
-       else
-               dev_info(adapter->pdev_dev,
-                        "TP Microcode version: %u.%u.%u.%u\n",
-                        FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
-                        FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
-                        FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
-                        FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
-
-       /* Expansion ROM version */
-       if (!adapter->params.er_vers)
-               dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
-       else
-               dev_info(adapter->pdev_dev,
-                        "Expansion ROM version: %u.%u.%u.%u\n",
-                        FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
-                        FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
-                        FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
-                        FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+       /* Hardware/Firmware/etc. Version/Revision IDs */
+       t4_dump_version_info(adapter);
 
        /* Software/Hardware configuration */
        dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n",
index 82bf7aa..db41b3e 100644 (file)
@@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0xd010, 0xd03c,
                0xdfc0, 0xdfe0,
                0xe000, 0xea7c,
-               0xf000, 0x11190,
+               0xf000, 0x11110,
+               0x11118, 0x11190,
                0x19040, 0x1906c,
                0x19078, 0x19080,
                0x1908c, 0x190e4,
@@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x1ff00, 0x1ff84,
                0x1ffc0, 0x1ffc8,
                0x30000, 0x30030,
-               0x30038, 0x30038,
-               0x30040, 0x30040,
                0x30100, 0x30144,
                0x30190, 0x301a0,
                0x301a8, 0x301b8,
@@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x33c3c, 0x33c50,
                0x33cf0, 0x33cfc,
                0x34000, 0x34030,
-               0x34038, 0x34038,
-               0x34040, 0x34040,
                0x34100, 0x34144,
                0x34190, 0x341a0,
                0x341a8, 0x341b8,
@@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x37c3c, 0x37c50,
                0x37cf0, 0x37cfc,
                0x38000, 0x38030,
-               0x38038, 0x38038,
-               0x38040, 0x38040,
                0x38100, 0x38144,
                0x38190, 0x381a0,
                0x381a8, 0x381b8,
@@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x3bc3c, 0x3bc50,
                0x3bcf0, 0x3bcfc,
                0x3c000, 0x3c030,
-               0x3c038, 0x3c038,
-               0x3c040, 0x3c040,
                0x3c100, 0x3c144,
                0x3c190, 0x3c1a0,
                0x3c1a8, 0x3c1b8,
@@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x1190, 0x1194,
                0x11a0, 0x11a4,
                0x11b0, 0x11b4,
-               0x11fc, 0x1258,
-               0x1280, 0x12d4,
-               0x12d9, 0x12d9,
-               0x12de, 0x12de,
-               0x12e3, 0x12e3,
-               0x12e8, 0x133c,
+               0x11fc, 0x1274,
+               0x1280, 0x133c,
                0x1800, 0x18fc,
                0x3000, 0x302c,
                0x3060, 0x30b0,
@@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x5ea0, 0x5eb0,
                0x5ec0, 0x5ec0,
                0x5ec8, 0x5ed0,
+               0x5ee0, 0x5ee0,
+               0x5ef0, 0x5ef0,
+               0x5f00, 0x5f00,
                0x6000, 0x6020,
                0x6028, 0x6040,
                0x6058, 0x609c,
@@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0xd300, 0xd31c,
                0xdfc0, 0xdfe0,
                0xe000, 0xf008,
+               0xf010, 0xf018,
+               0xf020, 0xf028,
                0x11000, 0x11014,
                0x11048, 0x1106c,
                0x11074, 0x11088,
@@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x1ff00, 0x1ff84,
                0x1ffc0, 0x1ffc8,
                0x30000, 0x30030,
-               0x30038, 0x30038,
-               0x30040, 0x30040,
-               0x30048, 0x30048,
-               0x30050, 0x30050,
-               0x3005c, 0x30060,
-               0x30068, 0x30068,
-               0x30070, 0x30070,
                0x30100, 0x30168,
                0x30190, 0x301a0,
                0x301a8, 0x301b8,
@@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x326a8, 0x326a8,
                0x326ec, 0x326ec,
                0x32a00, 0x32abc,
-               0x32b00, 0x32b38,
+               0x32b00, 0x32b18,
+               0x32b20, 0x32b38,
                0x32b40, 0x32b58,
                0x32b60, 0x32b78,
                0x32c00, 0x32c00,
                0x32c08, 0x32c3c,
-               0x32e00, 0x32e2c,
-               0x32f00, 0x32f2c,
                0x33000, 0x3302c,
                0x33034, 0x33050,
                0x33058, 0x33058,
@@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x33c38, 0x33c50,
                0x33cf0, 0x33cfc,
                0x34000, 0x34030,
-               0x34038, 0x34038,
-               0x34040, 0x34040,
-               0x34048, 0x34048,
-               0x34050, 0x34050,
-               0x3405c, 0x34060,
-               0x34068, 0x34068,
-               0x34070, 0x34070,
                0x34100, 0x34168,
                0x34190, 0x341a0,
                0x341a8, 0x341b8,
@@ -2465,13 +2444,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x366a8, 0x366a8,
                0x366ec, 0x366ec,
                0x36a00, 0x36abc,
-               0x36b00, 0x36b38,
+               0x36b00, 0x36b18,
+               0x36b20, 0x36b38,
                0x36b40, 0x36b58,
                0x36b60, 0x36b78,
                0x36c00, 0x36c00,
                0x36c08, 0x36c3c,
-               0x36e00, 0x36e2c,
-               0x36f00, 0x36f2c,
                0x37000, 0x3702c,
                0x37034, 0x37050,
                0x37058, 0x37058,
@@ -2545,8 +2523,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x40280, 0x40280,
                0x40304, 0x40304,
                0x40330, 0x4033c,
-               0x41304, 0x413b8,
-               0x413c0, 0x413c8,
+               0x41304, 0x413c8,
                0x413d0, 0x413dc,
                0x413f0, 0x413f0,
                0x41400, 0x4140c,
@@ -3100,6 +3077,179 @@ int t4_get_exprom_version(struct adapter *adap, u32 *vers)
 }
 
 /**
+ *      t4_get_vpd_version - return the VPD version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the VPD via the Firmware interface (thus this can only be called
+ *      once we're ready to issue Firmware commands).  The format of the
+ *      VPD version is adapter specific.  Returns 0 on success, an error on
+ *      failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the VPD version, so we zero-out the return-value parameter
+ *      in that case to avoid leaving it with garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the VPD
+ *      Revision ID, not the actual Revision ID as written in the Serial
+ *      EEPROM.  This is only an issue if a new VPD has been written and the
+ *      Firmware/Chip haven't yet gone through a RESET sequence.  So it's best
+ *      to defer calling this routine till after a FW_RESET_CMD has been issued
+ *      if the Host Driver will be performing a full adapter initialization.
+ */
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers)
+{
+       u32 vpdrev_param;
+       int ret;
+
+       vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                       FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV));
+       ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+                             1, &vpdrev_param, vers);
+       if (ret)
+               *vers = 0;
+       return ret;
+}
+
+/**
+ *      t4_get_scfg_version - return the Serial Configuration version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the Serial Configuration Version via the Firmware interface
+ *      (thus this can only be called once we're ready to issue Firmware
+ *      commands).  The format of the Serial Configuration version is
+ *      adapter specific.  Returns 0 on success, an error on failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the Serial Configuration version, so we zero-out the
+ *      return-value parameter in that case to avoid leaving it with
+ *      garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the Serial
+ *      Initialization Revision ID, not the actual Revision ID as written in
+ *      the Serial EEPROM.  This is only an issue if a new VPD has been written
+ *      and the Firmware/Chip haven't yet gone through a RESET sequence.  So
+ *      it's best to defer calling this routine till after a FW_RESET_CMD has
+ *      been issued if the Host Driver will be performing a full adapter
+ *      initialization.
+ */
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers)
+{
+       u32 scfgrev_param;
+       int ret;
+
+       scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+                        FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV));
+       ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+                             1, &scfgrev_param, vers);
+       if (ret)
+               *vers = 0;
+       return ret;
+}
+
+/**
+ *      t4_get_version_info - extract various chip/firmware version information
+ *      @adapter: the adapter
+ *
+ *      Reads various chip/firmware version numbers and stores them into the
+ *      adapter Adapter Parameters structure.  If any of the efforts fails
+ *      the first failure will be returned, but all of the version numbers
+ *      will be read.
+ */
+int t4_get_version_info(struct adapter *adapter)
+{
+       int ret = 0;
+
+       #define FIRST_RET(__getvinfo) \
+       do { \
+               int __ret = __getvinfo; \
+               if (__ret && !ret) \
+                       ret = __ret; \
+       } while (0)
+
+       FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers));
+       FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers));
+       FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers));
+       FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers));
+       FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers));
+       FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers));
+
+       #undef FIRST_RET
+       return ret;
+}
+
+/**
+ *      t4_dump_version_info - dump all of the adapter configuration IDs
+ *      @adapter: the adapter
+ *
+ *      Dumps all of the various bits of adapter configuration version/revision
+ *      IDs information.  This is typically called at some point after
+ *      t4_get_version_info() has been called.
+ */
+void t4_dump_version_info(struct adapter *adapter)
+{
+       /* Device information */
+       dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
+                adapter->params.vpd.id,
+                CHELSIO_CHIP_RELEASE(adapter->params.chip));
+       dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
+                adapter->params.vpd.sn, adapter->params.vpd.pn);
+
+       /* Firmware Version */
+       if (!adapter->params.fw_vers)
+               dev_warn(adapter->pdev_dev, "No firmware loaded\n");
+       else
+               dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
+                        FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
+                        FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
+                        FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
+                        FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
+
+       /* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
+        * Firmware, so dev_info() is more appropriate here.)
+        */
+       if (!adapter->params.bs_vers)
+               dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
+       else
+               dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
+                        FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
+                        FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
+                        FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
+                        FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
+
+       /* TP Microcode Version */
+       if (!adapter->params.tp_vers)
+               dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
+       else
+               dev_info(adapter->pdev_dev,
+                        "TP Microcode version: %u.%u.%u.%u\n",
+                        FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
+                        FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
+                        FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
+                        FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
+
+       /* Expansion ROM version */
+       if (!adapter->params.er_vers)
+               dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
+       else
+               dev_info(adapter->pdev_dev,
+                        "Expansion ROM version: %u.%u.%u.%u\n",
+                        FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
+                        FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
+                        FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
+                        FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+
+       /* Serial Configuration version */
+       dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n",
+                adapter->params.scfg_vers);
+
+       /* VPD Version */
+       dev_info(adapter->pdev_dev, "VPD version: %#x\n",
+                adapter->params.vpd_vers);
+}
+
+/**
  *     t4_check_fw_version - check if the FW is supported with this driver
  *     @adap: the adapter
  *
index 0ebed64..ad825fb 100644 (file)
@@ -1124,6 +1124,8 @@ enum fw_params_param_dev {
        FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */
        FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17,
        FW_PARAMS_PARAM_DEV_FWCACHE = 0x18,
+       FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A,
+       FW_PARAMS_PARAM_DEV_VPDREV = 0x1B,
        FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR  = 0x1C,
        FW_PARAMS_PARAM_DEV_MPSBGMAP    = 0x1E,
 };
index 17e566a..84394b4 100644 (file)
@@ -1303,7 +1303,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                0x00, 'L', 'i', 'n', 'u', 'x'
        };
        static int last_irq;
-       static int multiport_cnt;       /* For four-port boards w/one EEPROM */
        int i, irq;
        unsigned short sum;
        unsigned char *ee_data;
@@ -1557,7 +1556,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                } else if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&
                                   ee_data[2] == 0) {
                        sa_offset = 2;          /* Grrr, damn Matrox boards. */
-                       multiport_cnt = 4;
                }
 #ifdef CONFIG_MIPS_COBALT
                if ((pdev->bus->number == 0) &&
index 4ee042c..1b79a6d 100644 (file)
@@ -73,7 +73,7 @@
 
 #define ETHERCAT_MASTER_ID     0x14
 
-static struct pci_device_id ids[] = {
+static const struct pci_device_id ids[] = {
        { PCI_DEVICE(0x15ec, 0x5000), },
        { 0, }
 };
index a8db27e..78cb20c 100644 (file)
@@ -595,7 +595,7 @@ static void hns_nic_self_test(struct net_device *ndev,
                set_bit(NIC_STATE_TESTING, &priv->state);
 
                if (if_running)
-                       (void)dev_close(ndev);
+                       dev_close(ndev);
 
                for (i = 0; i < SELF_TEST_TPYE_NUM; i++) {
                        if (!st_param[i][1])
index b45fdc9..f1bfae0 100644 (file)
@@ -1018,8 +1018,12 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
        struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
        struct ixgbe_ring *ring;
 
-       ixgbe_for_each_ring(ring, q_vector->tx)
-               adapter->tx_ring[ring->queue_index] = NULL;
+       ixgbe_for_each_ring(ring, q_vector->tx) {
+               if (ring_is_xdp(ring))
+                       adapter->xdp_ring[ring->queue_index] = NULL;
+               else
+                       adapter->tx_ring[ring->queue_index] = NULL;
+       }
 
        ixgbe_for_each_ring(ring, q_vector->rx)
                adapter->rx_ring[ring->queue_index] = NULL;
index f1dbdf2..0f867dc 100644 (file)
@@ -2214,7 +2214,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
                                     struct ixgbe_ring *rx_ring,
                                     struct xdp_buff *xdp)
 {
-       int result = IXGBE_XDP_PASS;
+       int err, result = IXGBE_XDP_PASS;
        struct bpf_prog *xdp_prog;
        u32 act;
 
@@ -2231,6 +2231,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
        case XDP_TX:
                result = ixgbe_xmit_xdp_ring(adapter, xdp);
                break;
+       case XDP_REDIRECT:
+               err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+               if (!err)
+                       result = IXGBE_XDP_TX;
+               else
+                       result = IXGBE_XDP_CONSUMED;
+               break;
        default:
                bpf_warn_invalid_xdp_action(act);
                /* fallthrough */
@@ -2408,6 +2415,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                 */
                wmb();
                writel(ring->next_to_use, ring->tail);
+
+               xdp_do_flush_map();
        }
 
        u64_stats_update_begin(&rx_ring->syncp);
@@ -5810,6 +5819,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
        usleep_range(10000, 20000);
 
+       /* synchronize_sched() needed for pending XDP buffers to drain */
+       if (adapter->xdp_ring[0])
+               synchronize_sched();
        netif_tx_stop_all_queues(netdev);
 
        /* call carrier off first to avoid false dev_watchdog timeouts */
@@ -9823,6 +9835,53 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
        }
 }
 
+static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+       struct ixgbe_ring *ring;
+       int err;
+
+       if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+               return -EINVAL;
+
+       /* During program transitions its possible adapter->xdp_prog is assigned
+        * but ring has not been configured yet. In this case simply abort xmit.
+        */
+       ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+       if (unlikely(!ring))
+               return -EINVAL;
+
+       err = ixgbe_xmit_xdp_ring(adapter, xdp);
+       if (err != IXGBE_XDP_TX)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void ixgbe_xdp_flush(struct net_device *dev)
+{
+       struct ixgbe_adapter *adapter = netdev_priv(dev);
+       struct ixgbe_ring *ring;
+
+       /* Its possible the device went down between xdp xmit and flush so
+        * we need to ensure device is still up.
+        */
+       if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+               return;
+
+       ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+       if (unlikely(!ring))
+               return;
+
+       /* Force memory writes to complete before letting h/w know there
+        * are new descriptors to fetch.
+        */
+       wmb();
+       writel(ring->next_to_use, ring->tail);
+
+       return;
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_open               = ixgbe_open,
        .ndo_stop               = ixgbe_close,
@@ -9869,6 +9928,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
        .ndo_features_check     = ixgbe_features_check,
        .ndo_xdp                = ixgbe_xdp,
+       .ndo_xdp_xmit           = ixgbe_xdp_xmit,
+       .ndo_xdp_flush          = ixgbe_xdp_flush,
 };
 
 /**
index b3d0c2e..7e95cf5 100644 (file)
@@ -1027,7 +1027,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
        unsigned int done[MTK_MAX_DEVS];
        unsigned int bytes[MTK_MAX_DEVS];
        u32 cpu, dma;
-       static int condition;
        int total = 0, i;
 
        memset(done, 0, sizeof(done));
@@ -1051,10 +1050,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
                        mac = 1;
 
                skb = tx_buf->skb;
-               if (!skb) {
-                       condition = 1;
+               if (!skb)
                        break;
-               }
 
                if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
                        bytes[mac] += skb->len;
index 9807ef8..f6963b0 100644 (file)
@@ -57,6 +57,9 @@ enum mlxsw_afk_element {
        MLXSW_AFK_ELEMENT_VID,
        MLXSW_AFK_ELEMENT_PCP,
        MLXSW_AFK_ELEMENT_TCP_FLAGS,
+       MLXSW_AFK_ELEMENT_IP_TTL_,
+       MLXSW_AFK_ELEMENT_IP_ECN,
+       MLXSW_AFK_ELEMENT_IP_DSCP,
        MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
        MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
        MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2),
+       MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6),
        MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
        MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
index 1bd34d9..c6c5089 100644 (file)
@@ -3679,15 +3679,17 @@ enum mlxsw_reg_htgt_trap_group {
        MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
-       MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
-       MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
        MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
 };
 
 /* reg_htgt_trap_group
@@ -3952,10 +3954,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
  */
 MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
 
-static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en,
+                                      bool ipv6_en)
 {
        MLXSW_REG_ZERO(rgcr, payload);
        mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+       mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en);
 }
 
 /* RITR - Router Interface Table Register
@@ -4203,10 +4207,12 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
        MLXSW_REG_ZERO(ritr, payload);
        mlxsw_reg_ritr_enable_set(payload, enable);
        mlxsw_reg_ritr_ipv4_set(payload, 1);
+       mlxsw_reg_ritr_ipv6_set(payload, 1);
        mlxsw_reg_ritr_type_set(payload, type);
        mlxsw_reg_ritr_op_set(payload, op);
        mlxsw_reg_ritr_rif_set(payload, rif);
        mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+       mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
        mlxsw_reg_ritr_lb_en_set(payload, 1);
        mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
        mlxsw_reg_ritr_mtu_set(payload, mtu);
@@ -4718,6 +4724,7 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
  * Access: Index
  */
 MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16);
 
 enum mlxsw_reg_ralue_entry_type {
        MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
@@ -4851,6 +4858,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload,
        mlxsw_reg_ralue_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_ralue_pack6(char *payload,
+                                        enum mlxsw_reg_ralxx_protocol protocol,
+                                        enum mlxsw_reg_ralue_op op,
+                                        u16 virtual_router, u8 prefix_len,
+                                        const void *dip)
+{
+       mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+       mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
+}
+
 static inline void
 mlxsw_reg_ralue_act_remote_pack(char *payload,
                                enum mlxsw_reg_ralue_trap_action trap_action,
@@ -4954,6 +4971,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
  * Access: Index
  */
 MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16);
 
 enum mlxsw_reg_rauht_trap_action {
        MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
@@ -5018,6 +5036,15 @@ static inline void mlxsw_reg_rauht_pack4(char *payload,
        mlxsw_reg_rauht_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_rauht_pack6(char *payload,
+                                        enum mlxsw_reg_rauht_op op, u16 rif,
+                                        const char *mac, const char *dip)
+{
+       mlxsw_reg_rauht_pack(payload, op, rif, mac);
+       mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6);
+       mlxsw_reg_rauht_dip6_memcpy_to(payload, dip);
+}
+
 /* RALEU - Router Algorithmic LPM ECMP Update Register
  * ---------------------------------------------------
  * The register enables updating the ECMP section in the action for multiple
@@ -5216,6 +5243,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
 MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
                     32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
 
+#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20
+
+/* reg_rauhtd_ipv6_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+                    MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+                    16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_dip
+ * Destination IPv6 address.
+ * Access: RO
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN,
+                      16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10);
+
 static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
                                                    int ent_index, u16 *p_rif,
                                                    u32 *p_dip)
@@ -5224,6 +5275,14 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
        *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
 }
 
+static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
+                                                   int rec_index, u16 *p_rif,
+                                                   char *p_dip)
+{
+       *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index);
+       mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
index 60bf8f2..88b668b 100644 (file)
@@ -58,6 +58,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
 #include <net/tc_act/tc_sample.h>
+#include <net/addrconf.h>
 
 #include "spectrum.h"
 #include "pci.h"
@@ -3333,15 +3334,47 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
        MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
        MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
        MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+       MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
+                         false),
+       MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+                            false),
+       MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
+                            false),
+       MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+                            false),
        /* L3 traps */
-       MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-       MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-       MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-       MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
-       MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
-       MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
-       MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
-       MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+       MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+       MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
+                         false),
+       MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+                         false),
+       MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
+       MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+       MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
+       MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+       MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
+       MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
+       MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+                         false),
+       MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+                         false),
+       MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+                         false),
+       MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+                         false),
+       MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+       MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
+                         false),
+       MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
+       MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
+       MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
        /* PKT Sample trap */
        MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
                  false, SP_IP2ME, DISCARD),
@@ -3376,15 +3409,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
                        burst_size = 7;
                        break;
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
                        rate = 16 * 1024;
                        burst_size = 10;
                        break;
-               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
                        rate = 1024;
                        burst_size = 7;
                        break;
@@ -3433,21 +3468,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
                        priority = 5;
                        tc = 5;
                        break;
-               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
                        priority = 4;
                        tc = 4;
                        break;
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
                        priority = 3;
                        tc = 3;
                        break;
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
                        priority = 2;
                        tc = 2;
                        break;
-               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
                        priority = 1;
@@ -4357,6 +4394,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
        .priority = 10, /* Must be called before FIB notifier block */
 };
 
+static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+       .notifier_call = mlxsw_sp_inet6addr_event,
+};
+
 static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
        .notifier_call = mlxsw_sp_router_netevent_event,
 };
@@ -4377,6 +4418,7 @@ static int __init mlxsw_sp_module_init(void)
 
        register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+       register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
        register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 
        err = mlxsw_core_driver_register(&mlxsw_sp_driver);
@@ -4393,6 +4435,7 @@ err_pci_driver_register:
        mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
        unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+       unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        return err;
@@ -4403,6 +4446,7 @@ static void __exit mlxsw_sp_module_exit(void)
        mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
        mlxsw_core_driver_unregister(&mlxsw_sp_driver);
        unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+       unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
index 5ef98d4..e848f06 100644 (file)
@@ -384,6 +384,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
                            unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+                            unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
                                 struct netdev_notifier_changeupper_info *info);
 void
index 85d5001..fb80318 100644 (file)
@@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
        MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
        MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
 };
 
index 61a10f1..bc5173f 100644 (file)
@@ -984,6 +984,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
        MLXSW_AFK_ELEMENT_VID,
        MLXSW_AFK_ELEMENT_PCP,
        MLXSW_AFK_ELEMENT_TCP_FLAGS,
+       MLXSW_AFK_ELEMENT_IP_TTL_,
+       MLXSW_AFK_ELEMENT_IP_ECN,
+       MLXSW_AFK_ELEMENT_IP_DSCP,
 };
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
index 6afbe9e..bbd238e 100644 (file)
@@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
        [MLXSW_REG_SFGC_TYPE_BROADCAST]                         = 1,
-       [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]       = 1,
        [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP]     = 1,
        [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]                   = 1,
        [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST]                     = 1,
@@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
        [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]       = 1,
+       [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]       = 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {
index 21bb2bf..400ad40 100644 (file)
@@ -212,11 +212,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
+static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_acl_rule_info *rulei,
+                                   struct tc_cls_flower_offload *f,
+                                   u16 n_proto)
+{
+       struct flow_dissector_key_ip *key, *mask;
+
+       if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+               return 0;
+
+       if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
+               dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n");
+               return -EINVAL;
+       }
+
+       key = skb_flow_dissector_target(f->dissector,
+                                       FLOW_DISSECTOR_KEY_IP,
+                                       f->key);
+       mask = skb_flow_dissector_target(f->dissector,
+                                        FLOW_DISSECTOR_KEY_IP,
+                                        f->mask);
+       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
+                                      key->ttl, mask->ttl);
+
+       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
+                                      key->tos & 0x3, mask->tos & 0x3);
+
+       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
+                                      key->tos >> 6, mask->tos >> 6);
+
+       return 0;
+}
+
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
                                 struct net_device *dev,
                                 struct mlxsw_sp_acl_rule_info *rulei,
                                 struct tc_cls_flower_offload *f)
 {
+       u16 n_proto_mask = 0;
+       u16 n_proto_key = 0;
        u16 addr_type = 0;
        u8 ip_proto = 0;
        int err;
@@ -229,6 +264,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_PORTS) |
              BIT(FLOW_DISSECTOR_KEY_TCP) |
+             BIT(FLOW_DISSECTOR_KEY_IP) |
              BIT(FLOW_DISSECTOR_KEY_VLAN))) {
                dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
                return -EOPNOTSUPP;
@@ -253,8 +289,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
                        skb_flow_dissector_target(f->dissector,
                                                  FLOW_DISSECTOR_KEY_BASIC,
                                                  f->mask);
-               u16 n_proto_key = ntohs(key->n_proto);
-               u16 n_proto_mask = ntohs(mask->n_proto);
+               n_proto_key = ntohs(key->n_proto);
+               n_proto_mask = ntohs(mask->n_proto);
 
                if (n_proto_key == ETH_P_ALL) {
                        n_proto_key = 0;
@@ -324,6 +360,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
        if (err)
                return err;
 
+       err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask);
+       if (err)
+               return err;
+
        return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts);
 }
 
index 383fef5..e6d629f 100644 (file)
@@ -49,6 +49,9 @@
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
 #include <net/l3mdev.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -304,7 +307,7 @@ static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
                         const struct net_device *dev);
 
-#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 
 struct mlxsw_sp_prefix_usage {
        DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
@@ -384,23 +387,23 @@ struct mlxsw_sp_fib_node {
        struct mlxsw_sp_fib_key key;
 };
 
-struct mlxsw_sp_fib_entry_params {
-       u32 tb_id;
-       u32 prio;
-       u8 tos;
-       u8 type;
-};
-
 struct mlxsw_sp_fib_entry {
        struct list_head list;
        struct mlxsw_sp_fib_node *fib_node;
        enum mlxsw_sp_fib_entry_type type;
        struct list_head nexthop_group_node;
        struct mlxsw_sp_nexthop_group *nh_group;
-       struct mlxsw_sp_fib_entry_params params;
        bool offloaded;
 };
 
+struct mlxsw_sp_fib4_entry {
+       struct mlxsw_sp_fib_entry common;
+       u32 tb_id;
+       u32 prio;
+       u8 tos;
+       u8 type;
+};
+
 enum mlxsw_sp_l3proto {
        MLXSW_SP_L3_PROTO_IPV4,
        MLXSW_SP_L3_PROTO_IPV6,
@@ -428,6 +431,7 @@ struct mlxsw_sp_vr {
        u32 tb_id; /* kernel fib table id */
        unsigned int rif_count;
        struct mlxsw_sp_fib *fib4;
+       struct mlxsw_sp_fib *fib6;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -625,7 +629,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-       return !!vr->fib4;
+       return !!vr->fib4 || !!vr->fib6;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -694,7 +698,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
        case MLXSW_SP_L3_PROTO_IPV4:
                return vr->fib4;
        case MLXSW_SP_L3_PROTO_IPV6:
-               BUG_ON(1);
+               return vr->fib6;
        }
        return NULL;
 }
@@ -703,6 +707,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
                                              u32 tb_id)
 {
        struct mlxsw_sp_vr *vr;
+       int err;
 
        vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
        if (!vr)
@@ -710,12 +715,24 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
        vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
        if (IS_ERR(vr->fib4))
                return ERR_CAST(vr->fib4);
+       vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
+       if (IS_ERR(vr->fib6)) {
+               err = PTR_ERR(vr->fib6);
+               goto err_fib6_create;
+       }
        vr->tb_id = tb_id;
        return vr;
+
+err_fib6_create:
+       mlxsw_sp_fib_destroy(vr->fib4);
+       vr->fib4 = NULL;
+       return ERR_PTR(err);
 }
 
 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
 {
+       mlxsw_sp_fib_destroy(vr->fib6);
+       vr->fib6 = NULL;
        mlxsw_sp_fib_destroy(vr->fib4);
        vr->fib4 = NULL;
 }
@@ -773,7 +790,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
 
 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
 {
-       if (!vr->rif_count && list_empty(&vr->fib4->node_list))
+       if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
+           list_empty(&vr->fib6->node_list))
                mlxsw_sp_vr_destroy(vr);
 }
 
@@ -929,8 +947,11 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 static void
 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
 {
-       unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+       unsigned long interval;
 
+       interval = min_t(unsigned long,
+                        NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
+                        NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
        mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
 }
 
@@ -965,6 +986,36 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
        neigh_release(n);
 }
 
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+                                                  char *rauhtd_pl,
+                                                  int rec_index)
+{
+       struct net_device *dev;
+       struct neighbour *n;
+       struct in6_addr dip;
+       u16 rif;
+
+       mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
+                                        (char *) &dip);
+
+       if (!mlxsw_sp->router->rifs[rif]) {
+               dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+               return;
+       }
+
+       dev = mlxsw_sp->router->rifs[rif]->dev;
+       n = neigh_lookup(&nd_tbl, &dip, dev);
+       if (!n) {
+               netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
+                          &dip);
+               return;
+       }
+
+       netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
+       neigh_event_send(n, NULL);
+       neigh_release(n);
+}
+
 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
                                                   char *rauhtd_pl,
                                                   int rec_index)
@@ -988,6 +1039,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 
 }
 
+static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+                                                  char *rauhtd_pl,
+                                                  int rec_index)
+{
+       /* One record contains one entry. */
+       mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
+                                              rec_index);
+}
+
 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
                                              char *rauhtd_pl, int rec_index)
 {
@@ -997,7 +1057,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
                                                       rec_index);
                break;
        case MLXSW_REG_RAUHTD_TYPE_IPV6:
-               WARN_ON_ONCE(1);
+               mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
+                                                      rec_index);
                break;
        }
 }
@@ -1022,22 +1083,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
        return false;
 }
 
-static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+static int
+__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
+                                      char *rauhtd_pl,
+                                      enum mlxsw_reg_rauhtd_type type)
 {
-       char *rauhtd_pl;
-       u8 num_rec;
-       int i, err;
-
-       rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
-       if (!rauhtd_pl)
-               return -ENOMEM;
+       int i, num_rec;
+       int err;
 
        /* Make sure the neighbour's netdev isn't removed in the
         * process.
         */
        rtnl_lock();
        do {
-               mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
+               mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
                err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
                                      rauhtd_pl);
                if (err) {
@@ -1051,6 +1110,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
        } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
        rtnl_unlock();
 
+       return err;
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+       enum mlxsw_reg_rauhtd_type type;
+       char *rauhtd_pl;
+       int err;
+
+       rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+       if (!rauhtd_pl)
+               return -ENOMEM;
+
+       type = MLXSW_REG_RAUHTD_TYPE_IPV4;
+       err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+       if (err)
+               goto out;
+
+       type = MLXSW_REG_RAUHTD_TYPE_IPV6;
+       err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+out:
        kfree(rauhtd_pl);
        return err;
 }
@@ -1147,6 +1227,32 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void
+mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_neigh_entry *neigh_entry,
+                               enum mlxsw_reg_rauht_op op)
+{
+       struct neighbour *n = neigh_entry->key.n;
+       char rauht_pl[MLXSW_REG_RAUHT_LEN];
+       const char *dip = n->primary_key;
+
+       mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
+                             dip);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
+static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n)
+{
+       /* Packets with a link-local destination address are trapped
+        * after LPM lookup and never reach the neighbour table, so
+        * there is no need to program such neighbours to the device.
+        */
+       if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
+           IPV6_ADDR_LINKLOCAL)
+               return true;
+       return false;
+}
+
+static void
 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_neigh_entry *neigh_entry,
                            bool adding)
@@ -1154,11 +1260,17 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
        if (!adding && !neigh_entry->connected)
                return;
        neigh_entry->connected = adding;
-       if (neigh_entry->key.n->tbl == &arp_tbl)
+       if (neigh_entry->key.n->tbl == &arp_tbl) {
                mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
                                                mlxsw_sp_rauht_op(adding));
-       else
+       } else if (neigh_entry->key.n->tbl == &nd_tbl) {
+               if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n))
+                       return;
+               mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+                                               mlxsw_sp_rauht_op(adding));
+       } else {
                WARN_ON_ONCE(1);
+       }
 }
 
 struct mlxsw_sp_neigh_event_work {
@@ -1227,7 +1339,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
                p = ptr;
 
                /* We don't care about changes in the default table. */
-               if (!p->dev || p->tbl != &arp_tbl)
+               if (!p->dev || (p->tbl != &arp_tbl && p->tbl != &nd_tbl))
                        return NOTIFY_DONE;
 
                /* We are in atomic context and can't take RTNL mutex,
@@ -1246,7 +1358,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
        case NETEVENT_NEIGH_UPDATE:
                n = ptr;
 
-               if (n->tbl != &arp_tbl)
+               if (n->tbl != &arp_tbl && n->tbl != &nd_tbl)
                        return NOTIFY_DONE;
 
                mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
@@ -1340,6 +1452,7 @@ struct mlxsw_sp_nexthop {
                                                */
        struct rhash_head ht_node;
        struct mlxsw_sp_nexthop_key key;
+       unsigned char gw_addr[sizeof(struct in6_addr)];
        struct mlxsw_sp_rif *rif;
        u8 should_offload:1, /* set indicates this neigh is connected and
                              * should be put to KVD linear area of this group.
@@ -1360,6 +1473,7 @@ struct mlxsw_sp_nexthop_group_key {
 struct mlxsw_sp_nexthop_group {
        struct rhash_head ht_node;
        struct list_head fib_list; /* list of fib entries that use this group */
+       struct neigh_table *neigh_tbl;
        struct mlxsw_sp_nexthop_group_key key;
        u8 adj_index_valid:1,
           gateway:1; /* routes using the group use a gateway */
@@ -1677,7 +1791,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
                                       struct mlxsw_sp_nexthop *nh)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry;
-       struct fib_nh *fib_nh = nh->key.fib_nh;
        struct neighbour *n;
        u8 nud_state, dead;
        int err;
@@ -1690,9 +1803,10 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
         * The reference is taken either in neigh_lookup() or
         * in neigh_create() in case n is not found.
         */
-       n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+       n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
        if (!n) {
-               n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+               n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+                                nh->rif->dev);
                if (IS_ERR(n))
                        return PTR_ERR(n);
                neigh_event_send(n, NULL);
@@ -1754,10 +1868,10 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
        neigh_release(n);
 }
 
-static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
-                                struct mlxsw_sp_nexthop_group *nh_grp,
-                                struct mlxsw_sp_nexthop *nh,
-                                struct fib_nh *fib_nh)
+static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_nexthop_group *nh_grp,
+                                 struct mlxsw_sp_nexthop *nh,
+                                 struct fib_nh *fib_nh)
 {
        struct net_device *dev = fib_nh->nh_dev;
        struct in_device *in_dev;
@@ -1766,6 +1880,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 
        nh->nh_grp = nh_grp;
        nh->key.fib_nh = fib_nh;
+       memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
        err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
        if (err)
                return err;
@@ -1795,16 +1910,16 @@ err_nexthop_neigh_init:
        return err;
 }
 
-static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
-                                 struct mlxsw_sp_nexthop *nh)
+static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_nexthop *nh)
 {
        mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
        mlxsw_sp_nexthop_rif_fini(nh);
        mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 }
 
-static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
-                                  unsigned long event, struct fib_nh *fib_nh)
+static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
+                                   unsigned long event, struct fib_nh *fib_nh)
 {
        struct mlxsw_sp_nexthop_key key;
        struct mlxsw_sp_nexthop *nh;
@@ -1849,7 +1964,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_nexthop_group *
-mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
        struct mlxsw_sp_nexthop_group *nh_grp;
        struct mlxsw_sp_nexthop *nh;
@@ -1864,6 +1979,8 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
        if (!nh_grp)
                return ERR_PTR(-ENOMEM);
        INIT_LIST_HEAD(&nh_grp->fib_list);
+       nh_grp->neigh_tbl = &arp_tbl;
+
        nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
        nh_grp->count = fi->fib_nhs;
        nh_grp->key.fi = fi;
@@ -1871,9 +1988,9 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
        for (i = 0; i < nh_grp->count; i++) {
                nh = &nh_grp->nexthops[i];
                fib_nh = &fi->fib_nh[i];
-               err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+               err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
                if (err)
-                       goto err_nexthop_init;
+                       goto err_nexthop4_init;
        }
        err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
        if (err)
@@ -1882,10 +1999,10 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
        return nh_grp;
 
 err_nexthop_group_insert:
-err_nexthop_init:
+err_nexthop4_init:
        for (i--; i >= 0; i--) {
                nh = &nh_grp->nexthops[i];
-               mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+               mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
        }
        fib_info_put(nh_grp->key.fi);
        kfree(nh_grp);
@@ -1893,8 +2010,8 @@ err_nexthop_init:
 }
 
 static void
-mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
-                              struct mlxsw_sp_nexthop_group *nh_grp)
+mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_nexthop_group *nh_grp)
 {
        struct mlxsw_sp_nexthop *nh;
        int i;
@@ -1902,7 +2019,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
        for (i = 0; i < nh_grp->count; i++) {
                nh = &nh_grp->nexthops[i];
-               mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+               mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
        }
        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
        WARN_ON_ONCE(nh_grp->adj_index_valid);
@@ -1910,9 +2027,9 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
        kfree(nh_grp);
 }
 
-static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_fib_entry *fib_entry,
-                                     struct fib_info *fi)
+static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_fib_entry *fib_entry,
+                                      struct fib_info *fi)
 {
        struct mlxsw_sp_nexthop_group_key key;
        struct mlxsw_sp_nexthop_group *nh_grp;
@@ -1920,7 +2037,7 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
        key.fi = fi;
        nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
        if (!nh_grp) {
-               nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+               nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
                if (IS_ERR(nh_grp))
                        return PTR_ERR(nh_grp);
        }
@@ -1929,15 +2046,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
-static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
-                                      struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_fib_entry *fib_entry)
 {
        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
 
        list_del(&fib_entry->nexthop_group_node);
        if (!list_empty(&nh_grp->fib_list))
                return;
-       mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+       mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static bool
+mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
+{
+       struct mlxsw_sp_fib4_entry *fib4_entry;
+
+       fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+                                 common);
+       return !fib4_entry->tos;
 }
 
 static bool
@@ -1945,8 +2072,14 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
 {
        struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
 
-       if (fib_entry->params.tos)
-               return false;
+       switch (fib_entry->fib_node->fib->proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
+                       return false;
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               break;
+       }
 
        switch (fib_entry->type) {
        case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
@@ -2009,13 +2142,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
        }
 }
 
-static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
-                                        struct mlxsw_sp_fib_entry *fib_entry,
-                                        enum mlxsw_reg_ralue_op op)
+static void
+mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
+                             const struct mlxsw_sp_fib_entry *fib_entry,
+                             enum mlxsw_reg_ralue_op op)
 {
-       char ralue_pl[MLXSW_REG_RALUE_LEN];
        struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
-       u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
+       enum mlxsw_reg_ralxx_protocol proto;
+       u32 *p_dip;
+
+       proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
+
+       switch (fib->proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               p_dip = (u32 *) fib_entry->fib_node->key.addr;
+               mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
+                                     fib_entry->fib_node->key.prefix_len,
+                                     *p_dip);
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
+                                     fib_entry->fib_node->key.prefix_len,
+                                     fib_entry->fib_node->key.addr);
+               break;
+       }
+}
+
+static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_fib_entry *fib_entry,
+                                       enum mlxsw_reg_ralue_op op)
+{
+       char ralue_pl[MLXSW_REG_RALUE_LEN];
        enum mlxsw_reg_ralue_trap_action trap_action;
        u16 trap_id = 0;
        u32 adjacency_index = 0;
@@ -2034,24 +2191,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
                trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
        }
 
-       mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
-                             *p_dip);
+       mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
        mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
                                        adjacency_index, ecmp_size);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_fib_entry *fib_entry,
-                                       enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_fib_entry *fib_entry,
+                                      enum mlxsw_reg_ralue_op op)
 {
        struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
-       struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
        enum mlxsw_reg_ralue_trap_action trap_action;
        char ralue_pl[MLXSW_REG_RALUE_LEN];
-       u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
        u16 trap_id = 0;
        u16 rif_index = 0;
 
@@ -2063,42 +2215,34 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
                trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
        }
 
-       mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
-                             *p_dip);
+       mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
        mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
                                       rif_index);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
-                                      struct mlxsw_sp_fib_entry *fib_entry,
-                                      enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
+                                     struct mlxsw_sp_fib_entry *fib_entry,
+                                     enum mlxsw_reg_ralue_op op)
 {
-       struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
        char ralue_pl[MLXSW_REG_RALUE_LEN];
-       u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
 
-       mlxsw_reg_ralue_pack4(ralue_pl,
-                             (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-                             fib->vr->id, fib_entry->fib_node->key.prefix_len,
-                             *p_dip);
+       mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
        mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
-                                 struct mlxsw_sp_fib_entry *fib_entry,
-                                 enum mlxsw_reg_ralue_op op)
+static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_fib_entry *fib_entry,
+                                  enum mlxsw_reg_ralue_op op)
 {
        switch (fib_entry->type) {
        case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
-               return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
+               return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
        case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
-               return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+               return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
        case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
-               return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+               return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
        }
        return -EINVAL;
 }
@@ -2107,16 +2251,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
                                 struct mlxsw_sp_fib_entry *fib_entry,
                                 enum mlxsw_reg_ralue_op op)
 {
-       int err = -EINVAL;
+       int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
 
-       switch (fib_entry->fib_node->fib->proto) {
-       case MLXSW_SP_L3_PROTO_IPV4:
-               err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
-               break;
-       case MLXSW_SP_L3_PROTO_IPV6:
-               return err;
-       }
        mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
+
        return err;
 }
 
@@ -2166,72 +2304,80 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
        }
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
                           struct mlxsw_sp_fib_node *fib_node,
                           const struct fib_entry_notifier_info *fen_info)
 {
+       struct mlxsw_sp_fib4_entry *fib4_entry;
        struct mlxsw_sp_fib_entry *fib_entry;
        int err;
 
-       fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
-       if (!fib_entry) {
-               err = -ENOMEM;
-               goto err_fib_entry_alloc;
-       }
+       fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
+       if (!fib4_entry)
+               return ERR_PTR(-ENOMEM);
+       fib_entry = &fib4_entry->common;
 
        err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
        if (err)
                goto err_fib4_entry_type_set;
 
-       err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
+       err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
        if (err)
-               goto err_nexthop_group_get;
+               goto err_nexthop4_group_get;
 
-       fib_entry->params.prio = fen_info->fi->fib_priority;
-       fib_entry->params.tb_id = fen_info->tb_id;
-       fib_entry->params.type = fen_info->type;
-       fib_entry->params.tos = fen_info->tos;
+       fib4_entry->prio = fen_info->fi->fib_priority;
+       fib4_entry->tb_id = fen_info->tb_id;
+       fib4_entry->type = fen_info->type;
+       fib4_entry->tos = fen_info->tos;
 
        fib_entry->fib_node = fib_node;
 
-       return fib_entry;
+       return fib4_entry;
 
-err_nexthop_group_get:
+err_nexthop4_group_get:
 err_fib4_entry_type_set:
-       kfree(fib_entry);
-err_fib_entry_alloc:
+       kfree(fib4_entry);
        return ERR_PTR(err);
 }
 
 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_fib_entry *fib_entry)
+                                       struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-       mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
-       kfree(fib_entry);
+       mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+       kfree(fib4_entry);
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-                      const struct fib_entry_notifier_info *fen_info);
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+                        size_t addr_len, unsigned char prefix_len);
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
                           const struct fib_entry_notifier_info *fen_info)
 {
-       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib4_entry *fib4_entry;
        struct mlxsw_sp_fib_node *fib_node;
+       struct mlxsw_sp_fib *fib;
+       struct mlxsw_sp_vr *vr;
 
-       fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
-       if (IS_ERR(fib_node))
+       vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
+       if (!vr)
                return NULL;
+       fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
 
-       list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-               if (fib_entry->params.tb_id == fen_info->tb_id &&
-                   fib_entry->params.tos == fen_info->tos &&
-                   fib_entry->params.type == fen_info->type &&
-                   fib_entry->nh_group->key.fi == fen_info->fi) {
-                       return fib_entry;
+       fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
+                                           sizeof(fen_info->dst),
+                                           fen_info->dst_len);
+       if (!fib_node)
+               return NULL;
+
+       list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+               if (fib4_entry->tb_id == fen_info->tb_id &&
+                   fib4_entry->tos == fen_info->tos &&
+                   fib4_entry->type == fen_info->type &&
+                   fib4_entry->common.nh_group->key.fi == fen_info->fi) {
+                       return fib4_entry;
                }
        }
 
@@ -2388,28 +2534,25 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-                      const struct fib_entry_notifier_info *fen_info)
+mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
+                     size_t addr_len, unsigned char prefix_len,
+                     enum mlxsw_sp_l3proto proto)
 {
        struct mlxsw_sp_fib_node *fib_node;
        struct mlxsw_sp_fib *fib;
        struct mlxsw_sp_vr *vr;
        int err;
 
-       vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
+       vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
        if (IS_ERR(vr))
                return ERR_CAST(vr);
-       fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
+       fib = mlxsw_sp_vr_fib(vr, proto);
 
-       fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
-                                           sizeof(fen_info->dst),
-                                           fen_info->dst_len);
+       fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
        if (fib_node)
                return fib_node;
 
-       fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
-                                           sizeof(fen_info->dst),
-                                           fen_info->dst_len);
+       fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
        if (!fib_node) {
                err = -ENOMEM;
                goto err_fib_node_create;
@@ -2428,8 +2571,8 @@ err_fib_node_create:
        return ERR_PTR(err);
 }
 
-static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
-                                  struct mlxsw_sp_fib_node *fib_node)
+static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_fib_node *fib_node)
 {
        struct mlxsw_sp_vr *vr = fib_node->fib->vr;
 
@@ -2440,95 +2583,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_vr_put(vr);
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-                             const struct mlxsw_sp_fib_entry_params *params)
+                             const struct mlxsw_sp_fib4_entry *new4_entry)
 {
-       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib4_entry *fib4_entry;
 
-       list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-               if (fib_entry->params.tb_id > params->tb_id)
+       list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+               if (fib4_entry->tb_id > new4_entry->tb_id)
                        continue;
-               if (fib_entry->params.tb_id != params->tb_id)
+               if (fib4_entry->tb_id != new4_entry->tb_id)
                        break;
-               if (fib_entry->params.tos > params->tos)
+               if (fib4_entry->tos > new4_entry->tos)
                        continue;
-               if (fib_entry->params.prio >= params->prio ||
-                   fib_entry->params.tos < params->tos)
-                       return fib_entry;
+               if (fib4_entry->prio >= new4_entry->prio ||
+                   fib4_entry->tos < new4_entry->tos)
+                       return fib4_entry;
        }
 
        return NULL;
 }
 
-static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
-                                         struct mlxsw_sp_fib_entry *new_entry)
+static int
+mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
+                              struct mlxsw_sp_fib4_entry *new4_entry)
 {
        struct mlxsw_sp_fib_node *fib_node;
 
-       if (WARN_ON(!fib_entry))
+       if (WARN_ON(!fib4_entry))
                return -EINVAL;
 
-       fib_node = fib_entry->fib_node;
-       list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
-               if (fib_entry->params.tb_id != new_entry->params.tb_id ||
-                   fib_entry->params.tos != new_entry->params.tos ||
-                   fib_entry->params.prio != new_entry->params.prio)
+       fib_node = fib4_entry->common.fib_node;
+       list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
+                                common.list) {
+               if (fib4_entry->tb_id != new4_entry->tb_id ||
+                   fib4_entry->tos != new4_entry->tos ||
+                   fib4_entry->prio != new4_entry->prio)
                        break;
        }
 
-       list_add_tail(&new_entry->list, &fib_entry->list);
+       list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
        return 0;
 }
 
 static int
-mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
-                              struct mlxsw_sp_fib_entry *new_entry,
+mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
                               bool replace, bool append)
 {
-       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
+       struct mlxsw_sp_fib4_entry *fib4_entry;
 
-       fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
+       fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
 
        if (append)
-               return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
-       if (replace && WARN_ON(!fib_entry))
+               return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
+       if (replace && WARN_ON(!fib4_entry))
                return -EINVAL;
 
        /* Insert new entry before replaced one, so that we can later
         * remove the second.
         */
-       if (fib_entry) {
-               list_add_tail(&new_entry->list, &fib_entry->list);
+       if (fib4_entry) {
+               list_add_tail(&new4_entry->common.list,
+                             &fib4_entry->common.list);
        } else {
-               struct mlxsw_sp_fib_entry *last;
+               struct mlxsw_sp_fib4_entry *last;
 
-               list_for_each_entry(last, &fib_node->entry_list, list) {
-                       if (new_entry->params.tb_id > last->params.tb_id)
+               list_for_each_entry(last, &fib_node->entry_list, common.list) {
+                       if (new4_entry->tb_id > last->tb_id)
                                break;
-                       fib_entry = last;
+                       fib4_entry = last;
                }
 
-               if (fib_entry)
-                       list_add(&new_entry->list, &fib_entry->list);
+               if (fib4_entry)
+                       list_add(&new4_entry->common.list,
+                                &fib4_entry->common.list);
                else
-                       list_add(&new_entry->list, &fib_node->entry_list);
+                       list_add(&new4_entry->common.list,
+                                &fib_node->entry_list);
        }
 
        return 0;
 }
 
 static void
-mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-       list_del(&fib_entry->list);
+       list_del(&fib4_entry->common.list);
 }
 
-static int
-mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
-                            const struct mlxsw_sp_fib_node *fib_node,
-                            struct mlxsw_sp_fib_entry *fib_entry)
+static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_fib_entry *fib_entry)
 {
+       struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
        if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
                return 0;
 
@@ -2545,11 +2693,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 }
 
-static void
-mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
-                            const struct mlxsw_sp_fib_node *fib_node,
-                            struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_fib_entry *fib_entry)
 {
+       struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
        if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
                return;
 
@@ -2567,54 +2715,50 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-                                        struct mlxsw_sp_fib_entry *fib_entry,
+                                        struct mlxsw_sp_fib4_entry *fib4_entry,
                                         bool replace, bool append)
 {
-       struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
        int err;
 
-       err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
-                                            append);
+       err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
        if (err)
                return err;
 
-       err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
+       err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
        if (err)
-               goto err_fib4_node_entry_add;
+               goto err_fib_node_entry_add;
 
        return 0;
 
-err_fib4_node_entry_add:
-       mlxsw_sp_fib4_node_list_remove(fib_entry);
+err_fib_node_entry_add:
+       mlxsw_sp_fib4_node_list_remove(fib4_entry);
        return err;
 }
 
 static void
 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
-                               struct mlxsw_sp_fib_entry *fib_entry)
+                               struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-       struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
-       mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
-       mlxsw_sp_fib4_node_list_remove(fib_entry);
+       mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
+       mlxsw_sp_fib4_node_list_remove(fib4_entry);
 }
 
 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
-                                       struct mlxsw_sp_fib_entry *fib_entry,
+                                       struct mlxsw_sp_fib4_entry *fib4_entry,
                                        bool replace)
 {
-       struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-       struct mlxsw_sp_fib_entry *replaced;
+       struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
+       struct mlxsw_sp_fib4_entry *replaced;
 
        if (!replace)
                return;
 
        /* We inserted the new entry before replaced one */
-       replaced = list_next_entry(fib_entry, list);
+       replaced = list_next_entry(fib4_entry, common.list);
 
        mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
        mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
-       mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+       mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
 static int
@@ -2622,76 +2766,80 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
                         const struct fib_entry_notifier_info *fen_info,
                         bool replace, bool append)
 {
-       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib4_entry *fib4_entry;
        struct mlxsw_sp_fib_node *fib_node;
        int err;
 
        if (mlxsw_sp->router->aborted)
                return 0;
 
-       fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
+       fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
+                                        &fen_info->dst, sizeof(fen_info->dst),
+                                        fen_info->dst_len,
+                                        MLXSW_SP_L3_PROTO_IPV4);
        if (IS_ERR(fib_node)) {
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
                return PTR_ERR(fib_node);
        }
 
-       fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
-       if (IS_ERR(fib_entry)) {
+       fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
+       if (IS_ERR(fib4_entry)) {
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
-               err = PTR_ERR(fib_entry);
+               err = PTR_ERR(fib4_entry);
                goto err_fib4_entry_create;
        }
 
-       err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
+       err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
                                            append);
        if (err) {
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
                goto err_fib4_node_entry_link;
        }
 
-       mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
+       mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
 
        return 0;
 
 err_fib4_node_entry_link:
-       mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
+       mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
 err_fib4_entry_create:
-       mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+       mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
        return err;
 }
 
 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
                                     struct fib_entry_notifier_info *fen_info)
 {
-       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib4_entry *fib4_entry;
        struct mlxsw_sp_fib_node *fib_node;
 
        if (mlxsw_sp->router->aborted)
                return;
 
-       fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
-       if (WARN_ON(!fib_entry))
+       fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
+       if (WARN_ON(!fib4_entry))
                return;
-       fib_node = fib_entry->fib_node;
+       fib_node = fib4_entry->common.fib_node;
 
-       mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-       mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-       mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+       mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+       mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+       mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
+                                           enum mlxsw_reg_ralxx_protocol proto,
+                                           u8 tree_id)
 {
        char ralta_pl[MLXSW_REG_RALTA_LEN];
        char ralst_pl[MLXSW_REG_RALST_LEN];
        int i, err;
 
-       mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
-                            MLXSW_SP_LPM_TREE_MIN);
+       mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
        if (err)
                return err;
 
-       mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
+       mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
        if (err)
                return err;
@@ -2704,17 +2852,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
                if (!mlxsw_sp_vr_is_used(vr))
                        continue;
 
-               mlxsw_reg_raltb_pack(raltb_pl, vr->id,
-                                    MLXSW_REG_RALXX_PROTOCOL_IPV4,
-                                    MLXSW_SP_LPM_TREE_MIN);
+               mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
                                      raltb_pl);
                if (err)
                        return err;
 
-               mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
-                                     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
-                                     0);
+               mlxsw_reg_ralue_pack(ralue_pl, proto,
+                                    MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
                mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
                                      ralue_pl);
@@ -2725,17 +2870,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
        return 0;
 }
 
+static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+{
+       enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
+       int err;
+
+       err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+                                              MLXSW_SP_LPM_TREE_MIN);
+       if (err)
+               return err;
+
+       proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
+       return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+                                               MLXSW_SP_LPM_TREE_MIN + 1);
+}
+
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
                                     struct mlxsw_sp_fib_node *fib_node)
 {
-       struct mlxsw_sp_fib_entry *fib_entry, *tmp;
+       struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
 
-       list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
-               bool do_break = &tmp->list == &fib_node->entry_list;
+       list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
+                                common.list) {
+               bool do_break = &tmp->common.list == &fib_node->entry_list;
 
-               mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-               mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-               mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+               mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+               mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+               mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
                /* Break when entry list is empty and node was freed.
                 * Otherwise, we'll access freed memory in the next
                 * iteration.
@@ -2784,10 +2945,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
                if (!mlxsw_sp_vr_is_used(vr))
                        continue;
                mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+
+               /* If virtual router was only used for IPv4, then it's no
+                * longer used.
+                */
+               if (!mlxsw_sp_vr_is_used(vr))
+                       continue;
+               mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
        }
 }
 
-static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
 {
        int err;
 
@@ -2832,7 +3000,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
                err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
                                               replace, append);
                if (err)
-                       mlxsw_sp_router_fib4_abort(mlxsw_sp);
+                       mlxsw_sp_router_fib_abort(mlxsw_sp);
                fib_info_put(fib_work->fen_info.fi);
                break;
        case FIB_EVENT_ENTRY_DEL:
@@ -2843,13 +3011,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
        case FIB_EVENT_RULE_DEL:
                rule = fib_work->fr_info.rule;
                if (!fib4_rule_default(rule) && !rule->l3mdev)
-                       mlxsw_sp_router_fib4_abort(mlxsw_sp);
+                       mlxsw_sp_router_fib_abort(mlxsw_sp);
                fib_rule_put(rule);
                break;
        case FIB_EVENT_NH_ADD: /* fall through */
        case FIB_EVENT_NH_DEL:
-               mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
-                                      fib_work->fnh_info.fib_nh);
+               mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
+                                       fib_work->fnh_info.fib_nh);
                fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
                break;
        }
@@ -2941,17 +3109,30 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
 }
 
-static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
-                                      const struct in_device *in_dev,
-                                      unsigned long event)
+static bool
+mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
+                          unsigned long event)
 {
+       struct inet6_dev *inet6_dev;
+       bool addr_list_empty = true;
+       struct in_device *idev;
+
        switch (event) {
        case NETDEV_UP:
                if (!rif)
                        return true;
                return false;
        case NETDEV_DOWN:
-               if (rif && !in_dev->ifa_list &&
+               idev = __in_dev_get_rtnl(dev);
+               if (idev && idev->ifa_list)
+                       addr_list_empty = false;
+
+               inet6_dev = __in6_dev_get(dev);
+               if (addr_list_empty && inet6_dev &&
+                   !list_empty(&inet6_dev->addr_list))
+                       addr_list_empty = false;
+
+               if (rif && addr_list_empty &&
                    !netif_is_l3_slave(rif->dev))
                        return true;
                /* It is possible we already removed the RIF ourselves
@@ -3349,7 +3530,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
                goto out;
 
        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-       if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
+       if (!mlxsw_sp_rif_should_config(rif, dev, event))
                goto out;
 
        err = __mlxsw_sp_inetaddr_event(dev, event);
@@ -3357,6 +3538,61 @@ out:
        return notifier_from_errno(err);
 }
 
+struct mlxsw_sp_inet6addr_event_work {
+       struct work_struct work;
+       struct net_device *dev;
+       unsigned long event;
+};
+
+static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
+{
+       struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
+               container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
+       struct net_device *dev = inet6addr_work->dev;
+       unsigned long event = inet6addr_work->event;
+       struct mlxsw_sp *mlxsw_sp;
+       struct mlxsw_sp_rif *rif;
+
+       rtnl_lock();
+       mlxsw_sp = mlxsw_sp_lower_get(dev);
+       if (!mlxsw_sp)
+               goto out;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       if (!mlxsw_sp_rif_should_config(rif, dev, event))
+               goto out;
+
+       __mlxsw_sp_inetaddr_event(dev, event);
+out:
+       rtnl_unlock();
+       dev_put(dev);
+       kfree(inet6addr_work);
+}
+
+/* Called with rcu_read_lock() */
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+                            unsigned long event, void *ptr)
+{
+       struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
+       struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
+       struct net_device *dev = if6->idev->dev;
+
+       if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+               return NOTIFY_DONE;
+
+       inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
+       if (!inet6addr_work)
+               return NOTIFY_BAD;
+
+       INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
+       inet6addr_work->dev = dev;
+       inet6addr_work->event = event;
+       dev_hold(dev);
+       mlxsw_core_schedule_work(&inet6addr_work->work);
+
+       return NOTIFY_DONE;
+}
+
 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
                             const char *mac, int mtu)
 {
@@ -3558,6 +3794,11 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
        if (err)
                return err;
 
+       err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                                    mlxsw_sp_router_port(mlxsw_sp), true);
+       if (err)
+               goto err_fid_mc_flood_set;
+
        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                                     mlxsw_sp_router_port(mlxsw_sp), true);
        if (err)
@@ -3566,6 +3807,9 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
        return 0;
 
 err_fid_bc_flood_set:
+       mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                              mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
        return err;
 }
@@ -3577,6 +3821,8 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
 
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                               mlxsw_sp_router_port(mlxsw_sp), false);
+       mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                              mlxsw_sp_router_port(mlxsw_sp), false);
        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
 }
 
@@ -3607,6 +3853,11 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
        if (err)
                return err;
 
+       err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                                    mlxsw_sp_router_port(mlxsw_sp), true);
+       if (err)
+               goto err_fid_mc_flood_set;
+
        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                                     mlxsw_sp_router_port(mlxsw_sp), true);
        if (err)
@@ -3615,6 +3866,9 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
        return 0;
 
 err_fid_bc_flood_set:
+       mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                              mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
        return err;
 }
@@ -3626,6 +3880,8 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
 
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                               mlxsw_sp_router_port(mlxsw_sp), false);
+       mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+                              mlxsw_sp_router_port(mlxsw_sp), false);
        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
 }
 
@@ -3697,7 +3953,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
                return -EIO;
        max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 
-       mlxsw_reg_rgcr_pack(rgcr_pl, true);
+       mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
        mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
        if (err)
@@ -3709,7 +3965,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
        char rgcr_pl[MLXSW_REG_RGCR_LEN];
 
-       mlxsw_reg_rgcr_pack(rgcr_pl, false);
+       mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
index 12b5ed5..6165239 100644 (file)
@@ -61,11 +61,32 @@ enum {
        MLXSW_TRAP_ID_MTUERROR = 0x52,
        MLXSW_TRAP_ID_TTLERROR = 0x53,
        MLXSW_TRAP_ID_LBERROR = 0x54,
-       MLXSW_TRAP_ID_OSPF = 0x55,
+       MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
        MLXSW_TRAP_ID_IP2ME = 0x5F,
+       MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
+       MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
+       MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62,
+       MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63,
+       MLXSW_TRAP_ID_IPV6_OSPF = 0x64,
+       MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65,
+       MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66,
+       MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67,
+       MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68,
+       MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
+       MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
        MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
-       MLXSW_TRAP_ID_BGP_IPV4 = 0x88,
+       MLXSW_TRAP_ID_IPV4_BGP = 0x88,
+       MLXSW_TRAP_ID_IPV6_BGP = 0x89,
+       MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
+       MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B,
+       MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
+       MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
+       MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
        MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
+       MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
+       MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+       MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
+       MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
        MLXSW_TRAP_ID_ACL0 = 0x1C0,
 
        MLXSW_TRAP_ID_MAX = 0x1FF
index fd2ec36..462eda9 100644 (file)
@@ -42,8 +42,6 @@
  *     aggregated as a single large packet
  * napi: This parameter used to enable/disable NAPI (polling Rx)
  *     Possible values '1' for enable and '0' for disable. Default is '1'
- * ufo: This parameter used to enable/disable UDP Fragmentation Offload(UFO)
- *      Possible values '1' for enable and '0' for disable. Default is '0'
  * vlan_tag_strip: This can be used to enable or disable vlan stripping.
  *                 Possible values '1' for enable , '0' for disable.
  *                 Default is '2' - which means disable in promisc mode
@@ -453,7 +451,6 @@ S2IO_PARM_INT(lro_max_pkts, 0xFFFF);
 S2IO_PARM_INT(indicate_max_pkts, 0);
 
 S2IO_PARM_INT(napi, 1);
-S2IO_PARM_INT(ufo, 0);
 S2IO_PARM_INT(vlan_tag_strip, NO_STRIP_IN_PROMISC);
 
 static unsigned int tx_fifo_len[MAX_TX_FIFOS] =
@@ -4128,32 +4125,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        frg_len = skb_headlen(skb);
-       if (offload_type == SKB_GSO_UDP) {
-               int ufo_size;
-
-               ufo_size = s2io_udp_mss(skb);
-               ufo_size &= ~7;
-               txdp->Control_1 |= TXD_UFO_EN;
-               txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
-               txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
-#ifdef __BIG_ENDIAN
-               /* both variants do cpu_to_be64(be32_to_cpu(...)) */
-               fifo->ufo_in_band_v[put_off] =
-                       (__force u64)skb_shinfo(skb)->ip6_frag_id;
-#else
-               fifo->ufo_in_band_v[put_off] =
-                       (__force u64)skb_shinfo(skb)->ip6_frag_id << 32;
-#endif
-               txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v;
-               txdp->Buffer_Pointer = pci_map_single(sp->pdev,
-                                                     fifo->ufo_in_band_v,
-                                                     sizeof(u64),
-                                                     PCI_DMA_TODEVICE);
-               if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
-                       goto pci_map_failed;
-               txdp++;
-       }
-
        txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data,
                                              frg_len, PCI_DMA_TODEVICE);
        if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
@@ -4161,8 +4132,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 
        txdp->Host_Control = (unsigned long)skb;
        txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
-       if (offload_type == SKB_GSO_UDP)
-               txdp->Control_1 |= TXD_UFO_EN;
 
        frg_cnt = skb_shinfo(skb)->nr_frags;
        /* For fragmented SKB. */
@@ -4177,14 +4146,9 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
                                                             skb_frag_size(frag),
                                                             DMA_TO_DEVICE);
                txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag));
-               if (offload_type == SKB_GSO_UDP)
-                       txdp->Control_1 |= TXD_UFO_EN;
        }
        txdp->Control_1 |= TXD_GATHER_CODE_LAST;
 
-       if (offload_type == SKB_GSO_UDP)
-               frg_cnt++; /* as Txd0 was used for inband header */
-
        tx_fifo = mac_control->tx_FIFO_start[queue];
        val64 = fifo->list_info[put_off].list_phy_addr;
        writeq(val64, &tx_fifo->TxDL_Pointer);
@@ -7910,11 +7874,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
                NETIF_F_RXCSUM | NETIF_F_LRO;
        dev->features |= dev->hw_features |
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-       if (sp->device_type & XFRAME_II_DEVICE) {
-               dev->hw_features |= NETIF_F_UFO;
-               if (ufo)
-                       dev->features |= NETIF_F_UFO;
-       }
        if (sp->high_dma_flag == true)
                dev->features |= NETIF_F_HIGHDMA;
        dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
@@ -8147,10 +8106,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
        DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
                  dev->name);
-       if (ufo)
-               DBG_PRINT(ERR_DBG,
-                         "%s: UDP Fragmentation Offload(UFO) enabled\n",
-                         dev->name);
        /* Initialize device name */
        snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name,
                 sp->product_name);
index 0844b7c..afa10a1 100644 (file)
@@ -1285,7 +1285,7 @@ flash_temp:
 int qlcnic_dump_fw(struct qlcnic_adapter *adapter)
 {
        struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
-       static const struct qlcnic_dump_operations *fw_dump_ops;
+       const struct qlcnic_dump_operations *fw_dump_ops;
        struct qlcnic_83xx_dump_template_hdr *hdr_83xx;
        u32 entry_offset, dump, no_entries, buf_offset = 0;
        int i, k, ops_cnt, ops_index, dump_size = 0;
index c905971..d3f96a8 100644 (file)
@@ -746,59 +746,171 @@ static const char *efx_mcdi_phy_test_name(struct efx_nic *efx,
        return NULL;
 }
 
-#define SFP_PAGE_SIZE  128
-#define SFP_NUM_PAGES  2
-static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
-                                         struct ethtool_eeprom *ee, u8 *data)
+#define SFP_PAGE_SIZE          128
+#define SFF_DIAG_TYPE_OFFSET   92
+#define SFF_DIAG_ADDR_CHANGE   BIT(2)
+#define SFF_8079_NUM_PAGES     2
+#define SFF_8472_NUM_PAGES     4
+#define SFF_8436_NUM_PAGES     5
+#define SFF_DMT_LEVEL_OFFSET   94
+
+/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
+ * @efx:       NIC context
+ * @page:      EEPROM page number
+ * @data:      Destination data pointer
+ * @offset:    Offset in page to copy from in to data
+ * @space:     Space available in data
+ *
+ * Return:
+ *   >=0 - amount of data copied
+ *   <0  - error
+ */
+static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
+                                              unsigned int page,
+                                              u8 *data, ssize_t offset,
+                                              ssize_t space)
 {
        MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
        MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
        size_t outlen;
-       int rc;
        unsigned int payload_len;
-       unsigned int space_remaining = ee->len;
-       unsigned int page;
-       unsigned int page_off;
        unsigned int to_copy;
-       u8 *user_data = data;
+       int rc;
 
-       BUILD_BUG_ON(SFP_PAGE_SIZE * SFP_NUM_PAGES != ETH_MODULE_SFF_8079_LEN);
+       if (offset > SFP_PAGE_SIZE)
+               return -EINVAL;
 
-       page_off = ee->offset % SFP_PAGE_SIZE;
-       page = ee->offset / SFP_PAGE_SIZE;
+       to_copy = min(space, SFP_PAGE_SIZE - offset);
 
-       while (space_remaining && (page < SFP_NUM_PAGES)) {
-               MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+       MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+       rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
+                               inbuf, sizeof(inbuf),
+                               outbuf, sizeof(outbuf),
+                               &outlen);
 
-               rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_MEDIA_INFO,
-                                 inbuf, sizeof(inbuf),
-                                 outbuf, sizeof(outbuf),
-                                 &outlen);
-               if (rc)
-                       return rc;
+       if (rc)
+               return rc;
+
+       if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
+                       SFP_PAGE_SIZE))
+               return -EIO;
+
+       payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
+       if (payload_len != SFP_PAGE_SIZE)
+               return -EIO;
 
-               if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
-                             SFP_PAGE_SIZE))
-                       return -EIO;
+       memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
+              to_copy);
 
-               payload_len = MCDI_DWORD(outbuf,
-                                        GET_PHY_MEDIA_INFO_OUT_DATALEN);
-               if (payload_len != SFP_PAGE_SIZE)
-                       return -EIO;
+       return to_copy;
+}
 
-               /* Copy as much as we can into data */
-               payload_len -= page_off;
-               to_copy = (space_remaining < payload_len) ?
-                       space_remaining : payload_len;
+static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
+                                              unsigned int page,
+                                              u8 byte)
+{
+       int rc;
+       u8 data;
 
-               memcpy(user_data,
-                      MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + page_off,
-                      to_copy);
+       rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
+       if (rc == 1)
+               return data;
+
+       return rc;
+}
+
+static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
+{
+       /* Page zero of the EEPROM includes the diagnostic type at byte 92. */
+       return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+                                                  SFF_DIAG_TYPE_OFFSET);
+}
 
-               space_remaining -= to_copy;
-               user_data += to_copy;
-               page_off = 0;
-               page++;
+static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
+{
+       /* Page zero of the EEPROM includes the DMT level at byte 94. */
+       return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+                                                  SFF_DMT_LEVEL_OFFSET);
+}
+
+static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+{
+       struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+       if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
+               return phy_data->media;
+
+       /* A QSFP+ NIC may actually have an SFP+ module attached.
+        * The ID is page 0, byte 0.
+        */
+       switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+       case 0x3:
+               return MC_CMD_MEDIA_SFP_PLUS;
+       case 0xc:
+       case 0xd:
+               return MC_CMD_MEDIA_QSFP_PLUS;
+       default:
+               return 0;
+       }
+}
+
+static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
+                                         struct ethtool_eeprom *ee, u8 *data)
+{
+       int rc;
+       ssize_t space_remaining = ee->len;
+       unsigned int page_off;
+       bool ignore_missing;
+       int num_pages;
+       int page;
+
+       switch (efx_mcdi_phy_module_type(efx)) {
+       case MC_CMD_MEDIA_SFP_PLUS:
+               num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
+                               SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
+               page = 0;
+               ignore_missing = false;
+               break;
+       case MC_CMD_MEDIA_QSFP_PLUS:
+               num_pages = SFF_8436_NUM_PAGES;
+               page = -1; /* We obtain the lower page by asking for -1. */
+               ignore_missing = true; /* Ignore missing pages after page 0. */
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       page_off = ee->offset % SFP_PAGE_SIZE;
+       page += ee->offset / SFP_PAGE_SIZE;
+
+       while (space_remaining && (page < num_pages)) {
+               rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
+                                                        data, page_off,
+                                                        space_remaining);
+
+               if (rc > 0) {
+                       space_remaining -= rc;
+                       data += rc;
+                       page_off = 0;
+                       page++;
+               } else if (rc == 0) {
+                       space_remaining = 0;
+               } else if (ignore_missing && (page > 0)) {
+                       int intended_size = SFP_PAGE_SIZE - page_off;
+
+                       space_remaining -= intended_size;
+                       if (space_remaining < 0) {
+                               space_remaining = 0;
+                       } else {
+                               memset(data, 0, intended_size);
+                               data += intended_size;
+                               page_off = 0;
+                               page++;
+                               rc = 0;
+                       }
+               } else {
+                       return rc;
+               }
        }
 
        return 0;
@@ -807,16 +919,42 @@ static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
 static int efx_mcdi_phy_get_module_info(struct efx_nic *efx,
                                        struct ethtool_modinfo *modinfo)
 {
-       struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+       int sff_8472_level;
+       int diag_type;
 
-       switch (phy_cfg->media) {
+       switch (efx_mcdi_phy_module_type(efx)) {
        case MC_CMD_MEDIA_SFP_PLUS:
-               modinfo->type = ETH_MODULE_SFF_8079;
-               modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
-               return 0;
+               sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
+
+               /* If we can't read the diagnostics level we have none. */
+               if (sff_8472_level < 0)
+                       return -EOPNOTSUPP;
+
+               /* Check if this module requires the (unsupported) address
+                * change operation.
+                */
+               diag_type = efx_mcdi_phy_diag_type(efx);
+
+               if ((sff_8472_level == 0) ||
+                   (diag_type & SFF_DIAG_ADDR_CHANGE)) {
+                       modinfo->type = ETH_MODULE_SFF_8079;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+               } else {
+                       modinfo->type = ETH_MODULE_SFF_8472;
+                       modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               }
+               break;
+
+       case MC_CMD_MEDIA_QSFP_PLUS:
+               modinfo->type = ETH_MODULE_SFF_8436;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+               break;
+
        default:
                return -EOPNOTSUPP;
        }
+
+       return 0;
 }
 
 static const struct efx_phy_operations efx_mcdi_phy_ops = {
index 8603e39..5b56c24 100644 (file)
@@ -248,7 +248,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[],
        dev->ethtool_ops = &vsw_ethtool_ops;
        dev->watchdog_timeo = VSW_TX_TIMEOUT;
 
-       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG;
+       dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
        dev->features = dev->hw_features;
 
        /* MTU range: 68 - 65535 */
index 75b167e..0b95105 100644 (file)
@@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
        dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
        dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
-                          NETIF_F_IP_CSUM | NETIF_F_SG;
+                          NETIF_F_HW_CSUM | NETIF_F_SG;
        dev->features = dev->hw_features;
 
        /* MTU range: 68 - 65535 */
index 9e86833..ecf456c 100644 (file)
@@ -303,7 +303,7 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
        return skb;
 }
 
-static inline void vnet_fullcsum(struct sk_buff *skb)
+static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
 {
        struct iphdr *iph = ip_hdr(skb);
        int offset = skb_transport_offset(skb);
@@ -335,6 +335,40 @@ static inline void vnet_fullcsum(struct sk_buff *skb)
        }
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
+{
+       struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       int offset = skb_transport_offset(skb);
+
+       if (skb->protocol != htons(ETH_P_IPV6))
+               return;
+       if (ip6h->nexthdr != IPPROTO_TCP &&
+           ip6h->nexthdr != IPPROTO_UDP)
+               return;
+       skb->ip_summed = CHECKSUM_NONE;
+       skb->csum_level = 1;
+       skb->csum = 0;
+       if (ip6h->nexthdr == IPPROTO_TCP) {
+               struct tcphdr *ptcp = tcp_hdr(skb);
+
+               ptcp->check = 0;
+               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+               ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                             skb->len - offset, IPPROTO_TCP,
+                                             skb->csum);
+       } else if (ip6h->nexthdr == IPPROTO_UDP) {
+               struct udphdr *pudp = udp_hdr(skb);
+
+               pudp->check = 0;
+               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+               pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                             skb->len - offset, IPPROTO_UDP,
+                                             skb->csum);
+       }
+}
+#endif
+
 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 {
        struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
@@ -394,9 +428,14 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
                                struct iphdr *iph = ip_hdr(skb);
                                int ihl = iph->ihl * 4;
 
-                               skb_reset_transport_header(skb);
                                skb_set_transport_header(skb, ihl);
-                               vnet_fullcsum(skb);
+                               vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+                       } else if (skb->protocol == htons(ETH_P_IPV6)) {
+                               skb_set_transport_header(skb,
+                                                        sizeof(struct ipv6hdr));
+                               vnet_fullcsum_ipv6(skb);
+#endif
                        }
                }
                if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
@@ -1115,24 +1154,47 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
                if (skb->ip_summed == CHECKSUM_PARTIAL)
                        start = skb_checksum_start_offset(skb);
                if (start) {
-                       struct iphdr *iph = ip_hdr(nskb);
                        int offset = start + nskb->csum_offset;
 
+                       /* copy the headers, no csum here */
                        if (skb_copy_bits(skb, 0, nskb->data, start)) {
                                dev_kfree_skb(nskb);
                                dev_kfree_skb(skb);
                                return NULL;
                        }
+
+                       /* copy the rest, with csum calculation */
                        *(__sum16 *)(skb->data + offset) = 0;
                        csum = skb_copy_and_csum_bits(skb, start,
                                                      nskb->data + start,
                                                      skb->len - start, 0);
-                       if (iph->protocol == IPPROTO_TCP ||
-                           iph->protocol == IPPROTO_UDP) {
-                               csum = csum_tcpudp_magic(iph->saddr, iph->daddr,
-                                                        skb->len - start,
-                                                        iph->protocol, csum);
+
+                       /* add in the header checksums */
+                       if (skb->protocol == htons(ETH_P_IP)) {
+                               struct iphdr *iph = ip_hdr(nskb);
+
+                               if (iph->protocol == IPPROTO_TCP ||
+                                   iph->protocol == IPPROTO_UDP) {
+                                       csum = csum_tcpudp_magic(iph->saddr,
+                                                                iph->daddr,
+                                                                skb->len - start,
+                                                                iph->protocol,
+                                                                csum);
+                               }
+                       } else if (skb->protocol == htons(ETH_P_IPV6)) {
+                               struct ipv6hdr *ip6h = ipv6_hdr(nskb);
+
+                               if (ip6h->nexthdr == IPPROTO_TCP ||
+                                   ip6h->nexthdr == IPPROTO_UDP) {
+                                       csum = csum_ipv6_magic(&ip6h->saddr,
+                                                              &ip6h->daddr,
+                                                              skb->len - start,
+                                                              ip6h->nexthdr,
+                                                              csum);
+                               }
                        }
+
+                       /* save the final result */
                        *(__sum16 *)(nskb->data + offset) = csum;
 
                        nskb->ip_summed = CHECKSUM_NONE;
@@ -1318,8 +1380,14 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(!skb))
                goto out_dropped;
 
-       if (skb->ip_summed == CHECKSUM_PARTIAL)
-               vnet_fullcsum(skb);
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               if (skb->protocol == htons(ETH_P_IP))
+                       vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+               else if (skb->protocol == htons(ETH_P_IPV6))
+                       vnet_fullcsum_ipv6(skb);
+#endif
+       }
 
        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
        i = skb_get_queue_mapping(skb);
index 3b91257..e1b55b8 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/tcp.h>
+#include <linux/interrupt.h>
 
 #include "dwc-xlgmac.h"
 #include "dwc-xlgmac-reg.h"
index af27f7d..5ef6263 100644 (file)
@@ -389,7 +389,7 @@ struct axidma_bd {
  * @dma_err_tasklet: Tasklet structure to process Axi DMA errors
  * @tx_irq:    Axidma TX IRQ number
  * @rx_irq:    Axidma RX IRQ number
- * @phy_type:  Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
+ * @phy_mode:  Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
  * @options:   AxiEthernet option word
  * @last_link: Phy link state in which the PHY was negotiated earlier
  * @features:  Stores the extended features supported by the axienet hw
@@ -432,7 +432,7 @@ struct axienet_local {
 
        int tx_irq;
        int rx_irq;
-       u32 phy_type;
+       phy_interface_t phy_mode;
 
        u32 options;                    /* Current options word */
        u32 last_link;
index 33c595f..e74e1e8 100644 (file)
@@ -531,11 +531,11 @@ static void axienet_adjust_link(struct net_device *ndev)
        link_state = phy->speed | (phy->duplex << 1) | phy->link;
        if (lp->last_link != link_state) {
                if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) {
-                       if (lp->phy_type == XAE_PHY_TYPE_1000BASE_X)
+                       if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX)
                                setspeed = 0;
                } else {
                        if ((phy->speed == SPEED_1000) &&
-                           (lp->phy_type == XAE_PHY_TYPE_MII))
+                           (lp->phy_mode == PHY_INTERFACE_MODE_MII))
                                setspeed = 0;
                }
 
@@ -935,15 +935,8 @@ static int axienet_open(struct net_device *ndev)
                return ret;
 
        if (lp->phy_node) {
-               if (lp->phy_type == XAE_PHY_TYPE_GMII) {
-                       phydev = of_phy_connect(lp->ndev, lp->phy_node,
-                                               axienet_adjust_link, 0,
-                                               PHY_INTERFACE_MODE_GMII);
-               } else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) {
-                       phydev = of_phy_connect(lp->ndev, lp->phy_node,
-                                               axienet_adjust_link, 0,
-                                               PHY_INTERFACE_MODE_RGMII_ID);
-               }
+               phydev = of_phy_connect(lp->ndev, lp->phy_node,
+                                       axienet_adjust_link, 0, lp->phy_mode);
 
                if (!phydev)
                        dev_err(lp->dev, "of_phy_connect() failed\n");
@@ -1539,7 +1532,38 @@ static int axienet_probe(struct platform_device *pdev)
         * the device-tree and accordingly set flags.
         */
        of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem);
-       of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type);
+
+       /* Start with the proprietary, and broken phy_type */
+       ret = of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &value);
+       if (!ret) {
+               netdev_warn(ndev, "Please upgrade your device tree binary blob to use phy-mode");
+               switch (value) {
+               case XAE_PHY_TYPE_MII:
+                       lp->phy_mode = PHY_INTERFACE_MODE_MII;
+                       break;
+               case XAE_PHY_TYPE_GMII:
+                       lp->phy_mode = PHY_INTERFACE_MODE_GMII;
+                       break;
+               case XAE_PHY_TYPE_RGMII_2_0:
+                       lp->phy_mode = PHY_INTERFACE_MODE_RGMII_ID;
+                       break;
+               case XAE_PHY_TYPE_SGMII:
+                       lp->phy_mode = PHY_INTERFACE_MODE_SGMII;
+                       break;
+               case XAE_PHY_TYPE_1000BASE_X:
+                       lp->phy_mode = PHY_INTERFACE_MODE_1000BASEX;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto free_netdev;
+               }
+       } else {
+               lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+               if (lp->phy_mode < 0) {
+                       ret = -EINVAL;
+                       goto free_netdev;
+               }
+       }
 
        /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
        np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
index d6c2558..afb65f7 100644 (file)
@@ -183,10 +183,12 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
-int netvsc_device_add(struct hv_device *device,
-                     const struct netvsc_device_info *info);
+struct net_device_context;
+
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+                                       const struct netvsc_device_info *info);
 void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct hv_device *device,
+int netvsc_send(struct net_device_context *ndc,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
                struct hv_page_buffer **page_buffer,
@@ -200,10 +202,11 @@ int netvsc_recv_callback(struct net_device *net,
                         const struct ndis_pkt_8021q_info *vlan);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
+bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
-int rndis_filter_device_add(struct hv_device *dev,
-                           struct netvsc_device_info *info);
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+                                             struct netvsc_device_info *info);
 void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
                                struct netvsc_device *nvdev);
@@ -724,6 +727,7 @@ struct net_device_context {
 /* Per channel data */
 struct netvsc_channel {
        struct vmbus_channel *channel;
+       struct netvsc_device *net_device;
        const struct vmpacket_descriptor *desc;
        struct napi_struct napi;
        struct multi_send_data msd;
@@ -783,18 +787,6 @@ struct netvsc_device {
        struct rcu_head rcu;
 };
 
-static inline struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
-       return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
-}
-
-static inline struct netvsc_device *
-hv_device_to_netvsc_device(struct hv_device *device)
-{
-       return net_device_to_netvsc_device(hv_get_drvdata(device));
-}
-
 /* NdisInitialize message */
 struct rndis_initialize_request {
        u32 req_id;
index 0a9167d..0a9d9fe 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
@@ -41,7 +43,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
        struct hv_device *dev = net_device_ctx->device_ctx;
-       struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+       struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
        struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
 
        memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -103,7 +105,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
 {
        struct nvsp_message *revoke_packet;
        struct net_device *ndev = hv_get_drvdata(device);
-       struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+       struct net_device_context *ndc = netdev_priv(ndev);
+       struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
        int ret;
 
        /*
@@ -549,7 +552,8 @@ void netvsc_device_remove(struct hv_device *device)
 {
        struct net_device *ndev = hv_get_drvdata(device);
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct netvsc_device *net_device = net_device_ctx->nvdev;
+       struct netvsc_device *net_device
+               = rtnl_dereference(net_device_ctx->nvdev);
        int i;
 
        netvsc_disconnect_vsp(device);
@@ -819,13 +823,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
        msdp->count = 0;
 }
 
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
                struct hv_page_buffer **pb,
                struct sk_buff *skb)
 {
-       struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+       struct netvsc_device *net_device
+               = rcu_dereference_rtnl(ndev_ctx->nvdev);
+       struct hv_device *device = ndev_ctx->device_ctx;
        int ret = 0;
        struct netvsc_channel *nvchan;
        u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -837,7 +844,7 @@ int netvsc_send(struct hv_device *device,
        bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
        /* If device is rescinded, return error and packet will get dropped. */
-       if (unlikely(net_device->destroy))
+       if (unlikely(!net_device || net_device->destroy))
                return -ENODEV;
 
        /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@ -1219,11 +1226,11 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 {
        struct netvsc_channel *nvchan
                = container_of(napi, struct netvsc_channel, napi);
+       struct netvsc_device *net_device = nvchan->net_device;
        struct vmbus_channel *channel = nvchan->channel;
        struct hv_device *device = netvsc_channel_to_device(channel);
        u16 q_idx = channel->offermsg.offer.sub_channel_index;
        struct net_device *ndev = hv_get_drvdata(device);
-       struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
        int work_done = 0;
 
        /* If starting a new interval */
@@ -1271,8 +1278,8 @@ void netvsc_channel_cb(void *context)
  * netvsc_device_add - Callback when the device belonging to this
  * driver is added
  */
-int netvsc_device_add(struct hv_device *device,
-                     const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+                               const struct netvsc_device_info *device_info)
 {
        int i, ret = 0;
        int ring_size = device_info->ring_size;
@@ -1282,7 +1289,7 @@ int netvsc_device_add(struct hv_device *device,
 
        net_device = alloc_net_device();
        if (!net_device)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        net_device->ring_size = ring_size;
 
@@ -1302,6 +1309,7 @@ int netvsc_device_add(struct hv_device *device,
                struct netvsc_channel *nvchan = &net_device->chan_table[i];
 
                nvchan->channel = device->channel;
+               nvchan->net_device = net_device;
        }
 
        /* Enable NAPI handler before init callbacks */
@@ -1338,7 +1346,7 @@ int netvsc_device_add(struct hv_device *device,
                goto close;
        }
 
-       return ret;
+       return net_device;
 
 close:
        netif_napi_del(&net_device->chan_table[0].napi);
@@ -1349,6 +1357,5 @@ close:
 cleanup:
        free_netvsc_device(&net_device->rcu);
 
-       return ret;
-
+       return ERR_PTR(ret);
 }
index 63c98bb..a164981 100644 (file)
@@ -69,7 +69,7 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
        struct net_device_context *ndev_ctx = netdev_priv(net);
-       struct netvsc_device *nvdev = ndev_ctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
        struct rndis_device *rdev;
        int ret = 0;
 
@@ -505,8 +505,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
        /* timestamp packet in software */
        skb_tx_timestamp(skb);
-       ret = netvsc_send(net_device_ctx->device_ctx, packet,
-                         rndis_msg, &pb, skb);
+
+       ret = netvsc_send(net_device_ctx, packet, rndis_msg, &pb, skb);
        if (likely(ret == 0))
                return NETDEV_TX_OK;
 
@@ -717,6 +717,7 @@ static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
                             u32 num_chn)
 {
        struct netvsc_device_info device_info;
+       struct netvsc_device *net_device;
        int ret;
 
        memset(&device_info, 0, sizeof(device_info));
@@ -724,17 +725,16 @@ static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
        device_info.ring_size = ring_size;
        device_info.max_num_vrss_chns = num_chn;
 
-       ret = rndis_filter_device_add(dev, &device_info);
-       if (ret)
-               return ret;
-
        ret = netif_set_real_num_tx_queues(net, num_chn);
        if (ret)
                return ret;
 
        ret = netif_set_real_num_rx_queues(net, num_chn);
+       if (ret)
+               return ret;
 
-       return ret;
+       net_device = rndis_filter_device_add(dev, &device_info);
+       return IS_ERR(net_device) ? PTR_ERR(net_device) : 0;
 }
 
 static int netvsc_set_channels(struct net_device *net,
@@ -744,7 +744,7 @@ static int netvsc_set_channels(struct net_device *net,
        struct hv_device *dev = net_device_ctx->device_ctx;
        struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
        unsigned int count = channels->combined_count;
-       bool was_running;
+       bool was_opened;
        int ret;
 
        /* We do not support separate count for rx, tx, or other */
@@ -764,12 +764,9 @@ static int netvsc_set_channels(struct net_device *net,
        if (count > nvdev->max_chn)
                return -EINVAL;
 
-       was_running = netif_running(net);
-       if (was_running) {
-               ret = netvsc_close(net);
-               if (ret)
-                       return ret;
-       }
+       was_opened = rndis_filter_opened(nvdev);
+       if (was_opened)
+               rndis_filter_close(nvdev);
 
        rndis_filter_device_remove(dev, nvdev);
 
@@ -779,10 +776,12 @@ static int netvsc_set_channels(struct net_device *net,
        else
                netvsc_set_queues(net, dev, nvdev->num_chn);
 
-       if (was_running)
-               ret = netvsc_open(net);
+       nvdev = rtnl_dereference(net_device_ctx->nvdev);
+       if (was_opened)
+               rndis_filter_open(nvdev);
 
        /* We may have missed link change notifications */
+       net_device_ctx->last_reconfig = 0;
        schedule_delayed_work(&net_device_ctx->dwork, 0);
 
        return ret;
@@ -848,19 +847,18 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        struct net_device_context *ndevctx = netdev_priv(ndev);
        struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
        struct hv_device *hdev = ndevctx->device_ctx;
+       int orig_mtu = ndev->mtu;
        struct netvsc_device_info device_info;
-       bool was_running;
+       bool was_opened;
        int ret = 0;
 
        if (!nvdev || nvdev->destroy)
                return -ENODEV;
 
-       was_running = netif_running(ndev);
-       if (was_running) {
-               ret = netvsc_close(ndev);
-               if (ret)
-                       return ret;
-       }
+       netif_device_detach(ndev);
+       was_opened = rndis_filter_opened(nvdev);
+       if (was_opened)
+               rndis_filter_close(nvdev);
 
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
@@ -869,18 +867,21 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 
        rndis_filter_device_remove(hdev, nvdev);
 
-       /* 'nvdev' has been freed in rndis_filter_device_remove() ->
-        * netvsc_device_remove () -> free_netvsc_device().
-        * We mustn't access it before it's re-created in
-        * rndis_filter_device_add() -> netvsc_device_add().
-        */
-
        ndev->mtu = mtu;
 
-       rndis_filter_device_add(hdev, &device_info);
+       nvdev = rndis_filter_device_add(hdev, &device_info);
+       if (IS_ERR(nvdev)) {
+               ret = PTR_ERR(nvdev);
+
+               /* Attempt rollback to original MTU */
+               ndev->mtu = orig_mtu;
+               rndis_filter_device_add(hdev, &device_info);
+       }
+
+       if (was_opened)
+               rndis_filter_open(nvdev);
 
-       if (was_running)
-               ret = netvsc_open(ndev);
+       netif_device_attach(ndev);
 
        /* We may have missed link change notifications */
        schedule_delayed_work(&ndevctx->dwork, 0);
@@ -1363,7 +1364,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
                        continue;       /* not a netvsc device */
 
                net_device_ctx = netdev_priv(dev);
-               if (net_device_ctx->nvdev == NULL)
+               if (!rtnl_dereference(net_device_ctx->nvdev))
                        continue;       /* device is removed */
 
                if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1528,8 +1529,10 @@ static int netvsc_probe(struct hv_device *dev,
        memset(&device_info, 0, sizeof(device_info));
        device_info.ring_size = ring_size;
        device_info.num_chn = VRSS_CHANNEL_DEFAULT;
-       ret = rndis_filter_device_add(dev, &device_info);
-       if (ret != 0) {
+
+       nvdev = rndis_filter_device_add(dev, &device_info);
+       if (IS_ERR(nvdev)) {
+               ret = PTR_ERR(nvdev);
                netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
                free_netdev(net);
                hv_set_drvdata(dev, NULL);
@@ -1543,10 +1546,11 @@ static int netvsc_probe(struct hv_device *dev,
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
        net->vlan_features = net->features;
 
-       /* RCU not necessary here, device not registered */
-       nvdev = net_device_ctx->nvdev;
        netif_set_real_num_tx_queues(net, nvdev->num_chn);
        netif_set_real_num_rx_queues(net, nvdev->num_chn);
+       rtnl_unlock();
+
+       netdev_lockdep_set_classes(net);
 
        /* MTU range: 68 - 1500 or 65521 */
        net->min_mtu = NETVSC_MTU_MIN;
@@ -1588,7 +1592,8 @@ static int netvsc_remove(struct hv_device *dev)
         * removed. Also blocks mtu and channel changes.
         */
        rtnl_lock();
-       rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+       rndis_filter_device_remove(dev,
+                                  rtnl_dereference(ndev_ctx->nvdev));
        rtnl_unlock();
 
        unregister_netdev(net);
index 85c00e1..e439886 100644 (file)
@@ -84,6 +84,14 @@ static struct rndis_device *get_rndis_device(void)
        return device;
 }
 
+static struct netvsc_device *
+net_device_to_netvsc_device(struct net_device *ndev)
+{
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
+
+       return rtnl_dereference(net_device_ctx->nvdev);
+}
+
 static struct rndis_request *get_rndis_request(struct rndis_device *dev,
                                             u32 msg_type,
                                             u32 msg_len)
@@ -243,7 +251,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
                        pb[0].len;
        }
 
-       ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
+       ret = netvsc_send(net_device_ctx, packet, NULL, &pb, NULL);
        return ret;
 }
 
@@ -472,7 +480,7 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 
        if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
                struct net_device_context *ndevctx = netdev_priv(dev->ndev);
-               struct netvsc_device *nvdev = ndevctx->nvdev;
+               struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
                struct ndis_offload *hwcaps;
                u32 nvsp_version = nvdev->nvsp_version;
                u8 ndis_rev;
@@ -658,9 +666,9 @@ cleanup:
 
 static int
 rndis_filter_set_offload_params(struct net_device *ndev,
+                               struct netvsc_device *nvdev,
                                struct ndis_offload_params *req_offloads)
 {
-       struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
        struct rndis_device *rdev = nvdev->extension;
        struct rndis_request *request;
        struct rndis_set_request *set;
@@ -944,7 +952,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
        struct rndis_request *request;
        struct rndis_halt_request *halt;
        struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-       struct netvsc_device *nvdev = net_device_ctx->nvdev;
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
        /* Attempt to do a rndis device halt */
        request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1052,8 +1060,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
                complete(&nvscdev->channel_init_wait);
 }
 
-int rndis_filter_device_add(struct hv_device *dev,
-                           struct netvsc_device_info *device_info)
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+                                     struct netvsc_device_info *device_info)
 {
        struct net_device *net = hv_get_drvdata(dev);
        struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -1072,21 +1080,20 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        rndis_device = get_rndis_device();
        if (!rndis_device)
-               return -ENODEV;
+               return ERR_PTR(-ENODEV);
 
        /*
         * Let the inner driver handle this first to create the netvsc channel
         * NOTE! Once the channel is created, we may get a receive callback
         * (RndisFilterOnReceive()) before this call is completed
         */
-       ret = netvsc_device_add(dev, device_info);
-       if (ret != 0) {
+       net_device = netvsc_device_add(dev, device_info);
+       if (IS_ERR(net_device)) {
                kfree(rndis_device);
-               return ret;
+               return net_device;
        }
 
        /* Initialize the rndis device */
-       net_device = net_device_ctx->nvdev;
        net_device->max_chn = 1;
        net_device->num_chn = 1;
 
@@ -1097,10 +1104,8 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        /* Send the rndis initialization message */
        ret = rndis_filter_init_device(rndis_device);
-       if (ret != 0) {
-               rndis_filter_device_remove(dev, net_device);
-               return ret;
-       }
+       if (ret != 0)
+               goto err_dev_remv;
 
        /* Get the MTU from the host */
        size = sizeof(u32);
@@ -1112,19 +1117,15 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        /* Get the mac address */
        ret = rndis_filter_query_device_mac(rndis_device);
-       if (ret != 0) {
-               rndis_filter_device_remove(dev, net_device);
-               return ret;
-       }
+       if (ret != 0)
+               goto err_dev_remv;
 
        memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
 
        /* Find HW offload capabilities */
        ret = rndis_query_hwcaps(rndis_device, &hwcaps);
-       if (ret != 0) {
-               rndis_filter_device_remove(dev, net_device);
-               return ret;
-       }
+       if (ret != 0)
+               goto err_dev_remv;
 
        /* A value of zero means "no change"; now turn on what we want. */
        memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@ -1179,7 +1180,7 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        netif_set_gso_max_size(net, gso_max_size);
 
-       ret = rndis_filter_set_offload_params(net, &offloads);
+       ret = rndis_filter_set_offload_params(net, net_device, &offloads);
        if (ret)
                goto err_dev_remv;
 
@@ -1190,7 +1191,7 @@ int rndis_filter_device_add(struct hv_device *dev,
                   rndis_device->link_state ? "down" : "up");
 
        if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
-               return 0;
+               return net_device;
 
        rndis_filter_query_link_speed(rndis_device);
 
@@ -1223,7 +1224,7 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        num_rss_qs = net_device->num_chn - 1;
        if (num_rss_qs == 0)
-               return 0;
+               return net_device;
 
        refcount_set(&net_device->sc_offered, num_rss_qs);
        vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
@@ -1260,11 +1261,11 @@ out:
                net_device->num_chn = 1;
        }
 
-       return 0; /* return 0 because primary channel can be used alone */
+       return net_device;
 
 err_dev_remv:
        rndis_filter_device_remove(dev, net_device);
-       return ret;
+       return ERR_PTR(ret);
 }
 
 void rndis_filter_device_remove(struct hv_device *dev,
@@ -1302,3 +1303,8 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
        return rndis_filter_close_device(nvdev->extension);
 }
+
+bool rndis_filter_opened(const struct netvsc_device *nvdev)
+{
+       return atomic_read(&nvdev->open_cnt) > 0;
+}
index a626c53..326243f 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <linux/interrupt.h>
 
 #include <net/ieee802154_netdev.h>
 #include <net/mac802154.h>
index f37e3c1..fdde207 100644 (file)
@@ -169,7 +169,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 
 #define IPVLAN_FEATURES \
        (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-        NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
+        NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
index 22f133e..5dea206 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/virtio_net.h>
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-                     NETIF_F_TSO6 | NETIF_F_UFO)
+                     NETIF_F_TSO6)
 
 static dev_t ipvtap_major;
 static struct cdev ipvtap_cdev;
index 0f581ee..ca35c6b 100644 (file)
@@ -841,7 +841,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define MACVLAN_FEATURES \
        (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-        NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
+        NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
         NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
         NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
index 91e7b19..c2d0ea2 100644 (file)
@@ -49,7 +49,7 @@ static struct class macvtap_class = {
 static struct cdev macvtap_cdev;
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-                     NETIF_F_TSO6 | NETIF_F_UFO)
+                     NETIF_F_TSO6)
 
 static void macvtap_count_tx_dropped(struct tap_dev *tap)
 {
index 2df7b62..b6f9fa6 100644 (file)
@@ -399,8 +399,7 @@ error:
        }
 
        /* Put PHYs in RESET to save power */
-       if (bus->reset_gpiod)
-               gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+       gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
        device_del(&bus->dev);
        return err;
@@ -425,8 +424,7 @@ void mdiobus_unregister(struct mii_bus *bus)
        }
 
        /* Put PHYs in RESET to save power */
-       if (bus->reset_gpiod)
-               gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+       gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
        device_del(&bus->dev);
 }
index 3570c75..ca267fd 100644 (file)
@@ -943,9 +943,6 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
                        if (arg & TUN_F_TSO6)
                                feature_mask |= NETIF_F_TSO6;
                }
-
-               if (arg & TUN_F_UFO)
-                       feature_mask |= NETIF_F_UFO;
        }
 
        /* tun/tap driver inverts the usage for TSO offloads, where
@@ -956,7 +953,7 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
         * When user space turns off TSO, we turn off GSO/LRO so that
         * user-space will not receive TSO frames.
         */
-       if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
+       if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6))
                features |= RX_OFFLOADS;
        else
                features &= ~RX_OFFLOADS;
@@ -1078,7 +1075,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
        case TUNSETOFFLOAD:
                /* let the user check for future flags */
                if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
-                           TUN_F_TSO_ECN | TUN_F_UFO))
+                           TUN_F_TSO_ECN))
                        return -EINVAL;
 
                rtnl_lock();
index 3d4c245..a93392d 100644 (file)
@@ -199,7 +199,7 @@ struct tun_struct {
        struct net_device       *dev;
        netdev_features_t       set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
-                         NETIF_F_TSO6|NETIF_F_UFO)
+                         NETIF_F_TSO6)
 
        int                     align;
        int                     vnet_hdr_sz;
@@ -1921,11 +1921,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
                                features |= NETIF_F_TSO6;
                        arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
                }
-
-               if (arg & TUN_F_UFO) {
-                       features |= NETIF_F_UFO;
-                       arg &= ~TUN_F_UFO;
-               }
        }
 
        /* This gives the user a way to test for new features in future by
index 8f572b9..811b182 100644 (file)
@@ -367,7 +367,7 @@ static struct attribute *cdc_ncm_sysfs_attrs[] = {
        NULL,
 };
 
-static struct attribute_group cdc_ncm_sysfs_attr_group = {
+static const struct attribute_group cdc_ncm_sysfs_attr_group = {
        .name = "cdc_ncm",
        .attrs = cdc_ncm_sysfs_attrs,
 };
index 99a26a9..9983016 100644 (file)
@@ -2429,7 +2429,7 @@ static int virtnet_probe(struct virtio_device *vdev)
                        dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
 
                if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
-                       dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
+                       dev->hw_features |= NETIF_F_TSO
                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
                }
                /* Individual feature bits: what can host handle? */
@@ -2439,13 +2439,11 @@ static int virtnet_probe(struct virtio_device *vdev)
                        dev->hw_features |= NETIF_F_TSO6;
                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
                        dev->hw_features |= NETIF_F_TSO_ECN;
-               if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
-                       dev->hw_features |= NETIF_F_UFO;
 
                dev->features |= NETIF_F_GSO_ROBUST;
 
                if (gso)
-                       dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
+                       dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
                /* (!csum && gso) case will be fixed by register_netdev() */
        }
        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
index 84143a0..54201c0 100644 (file)
@@ -7837,7 +7837,7 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) {
        struct airo_info *ai = dev->ml_priv;
        int  ridcode;
         int  enabled;
-       static int (* writer)(struct airo_info *, u16 rid, const void *, int, int);
+       int (*writer)(struct airo_info *, u16 rid, const void *, int, int);
        unsigned char *iobuf;
 
        /* Only super-user can write RIDs */
index aaaca4d..ccbe745 100644 (file)
@@ -4324,7 +4324,7 @@ static struct attribute *ipw2100_sysfs_entries[] = {
        NULL,
 };
 
-static struct attribute_group ipw2100_attribute_group = {
+static const struct attribute_group ipw2100_attribute_group = {
        .attrs = ipw2100_sysfs_entries,
 };
 
index 9368abd..c311b1a 100644 (file)
@@ -11500,7 +11500,7 @@ static struct attribute *ipw_sysfs_entries[] = {
        NULL
 };
 
-static struct attribute_group ipw_attribute_group = {
+static const struct attribute_group ipw_attribute_group = {
        .name = NULL,           /* put in device directory */
        .attrs = ipw_sysfs_entries,
 };
index 38bf403..329f3a6 100644 (file)
@@ -3464,7 +3464,7 @@ static struct attribute *il3945_sysfs_entries[] = {
        NULL
 };
 
-static struct attribute_group il3945_attribute_group = {
+static const struct attribute_group il3945_attribute_group = {
        .name = NULL,           /* put in device directory */
        .attrs = il3945_sysfs_entries,
 };
index 5b51fba..de9b652 100644 (file)
@@ -4654,7 +4654,7 @@ static struct attribute *il_sysfs_entries[] = {
        NULL
 };
 
-static struct attribute_group il_attribute_group = {
+static const struct attribute_group il_attribute_group = {
        .name = NULL,           /* put in device directory */
        .attrs = il_sysfs_entries,
 };
index 55f238a..c58393e 100644 (file)
@@ -478,7 +478,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        u16 read_point = 0, write_point = 0, remind_cnt = 0;
        u32 tmp_4byte = 0;
-       static u16 last_read_point;
        static bool start_rx;
 
        tmp_4byte = rtl_read_dword(rtlpriv, REG_RXQ_TXBD_IDX);
@@ -506,7 +505,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
 
        rtlpci->rx_ring[queue_index].next_rx_rp = write_point;
 
-       last_read_point = read_point;
        return remind_cnt;
 }
 
@@ -917,7 +915,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        u16 cur_tx_rp = 0;
        u16 cur_tx_wp = 0;
-       static u16 last_txw_point;
        static bool over_run;
        u32 tmp = 0;
        u8 q_idx = *val;
@@ -951,9 +948,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
                                rtl_write_word(rtlpriv,
                                               get_desc_addr_fr_q_idx(q_idx),
                                               ring->cur_tx_wp);
-
-                               if (q_idx == 1)
-                                       last_txw_point = cur_tx_wp;
                        }
 
                        if (ring->avl_desc < (max_tx_desc - 15)) {
index b69e7a5..6353c74 100644 (file)
@@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+
+/* Map specifics */
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
+void __dev_map_flush(struct bpf_map *map);
+
 #else
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
 static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 {
 }
+
+static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
+                                                      u32 key)
+{
+       return NULL;
+}
+
+static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
+{
+}
+
+static inline void __dev_map_flush(struct bpf_map *map)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
index 3d137c3..b1e1035 100644 (file)
@@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+#endif
index bfef1e5..d19ed3c 100644 (file)
@@ -711,7 +711,21 @@ bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                       const struct bpf_insn *patch, u32 len);
+
+/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
+ * same cpu context. Further for best results no more than a single map
+ * for the do_redirect/do_flush pair should be used. This limitation is
+ * because we only track one map and force a flush when the map changes.
+ * This does not appear to be a real limitation for existing software.
+ */
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb);
+int xdp_do_redirect(struct net_device *dev,
+                   struct xdp_buff *xdp,
+                   struct bpf_prog *prog);
+void xdp_do_flush_map(void);
+
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_redirect(u32 ifindex);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
index 1d4737c..ebd2736 100644 (file)
@@ -36,7 +36,6 @@ enum {
        /**/NETIF_F_GSO_SHIFT,          /* keep the order of SKB_GSO_* bits */
        NETIF_F_TSO_BIT                 /* ... TCPv4 segmentation */
                = NETIF_F_GSO_SHIFT,
-       NETIF_F_UFO_BIT,                /* ... UDPv4 fragmentation */
        NETIF_F_GSO_ROBUST_BIT,         /* ... ->SKB_GSO_DODGY */
        NETIF_F_TSO_ECN_BIT,            /* ... TCP ECN support */
        NETIF_F_TSO_MANGLEID_BIT,       /* ... IPV4 ID mangling allowed */
@@ -118,7 +117,6 @@ enum {
 #define NETIF_F_TSO6           __NETIF_F(TSO6)
 #define NETIF_F_TSO_ECN                __NETIF_F(TSO_ECN)
 #define NETIF_F_TSO            __NETIF_F(TSO)
-#define NETIF_F_UFO            __NETIF_F(UFO)
 #define NETIF_F_VLAN_CHALLENGED        __NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS          __NETIF_F(RXFCS)
 #define NETIF_F_RXALL          __NETIF_F(RXALL)
@@ -172,7 +170,7 @@ enum {
                                 NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE   (NETIF_F_ALL_TSO | NETIF_F_UFO | \
+#define NETIF_F_GSO_SOFTWARE   (NETIF_F_ALL_TSO | \
                                 NETIF_F_GSO_SCTP)
 
 /*
index 779b235..614642e 100644 (file)
@@ -66,6 +66,7 @@ struct mpls_dev;
 /* UDP Tunnel offloads */
 struct udp_tunnel_info;
 struct bpf_prog;
+struct xdp_buff;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
                                    const struct ethtool_ops *ops);
@@ -1138,7 +1139,12 @@ struct xfrmdev_ops {
  * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
  *     This function is used to set or query state related to XDP on the
  *     netdevice. See definition of enum xdp_netdev_command for details.
- *
+ * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ *     This function is used to submit a XDP packet for transmit on a
+ *     netdevice.
+ * void (*ndo_xdp_flush)(struct net_device *dev);
+ *     This function is used to inform the driver to flush a paticular
+ *     xpd tx queue. Must be called on same CPU as xdp_xmit.
  */
 struct net_device_ops {
        int                     (*ndo_init)(struct net_device *dev);
@@ -1323,6 +1329,9 @@ struct net_device_ops {
                                                       int needed_headroom);
        int                     (*ndo_xdp)(struct net_device *dev,
                                           struct netdev_xdp *xdp);
+       int                     (*ndo_xdp_xmit)(struct net_device *dev,
+                                               struct xdp_buff *xdp);
+       void                    (*ndo_xdp_flush)(struct net_device *dev);
 };
 
 /**
@@ -2423,8 +2432,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev);
-int dev_close(struct net_device *dev);
-int dev_close_many(struct list_head *head, bool unlink);
+void dev_close(struct net_device *dev);
+void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
@@ -4089,7 +4098,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 
        /* check flags correspondence */
        BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
-       BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
        BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
index 99e8664..913474d 100644 (file)
@@ -273,69 +273,64 @@ struct sctp_init_chunk {
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
-typedef struct sctp_ipv4addr_param {
+struct sctp_ipv4addr_param {
        struct sctp_paramhdr param_hdr;
-       struct in_addr  addr;
-} sctp_ipv4addr_param_t;
+       struct in_addr addr;
+};
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
-typedef struct sctp_ipv6addr_param {
+struct sctp_ipv6addr_param {
        struct sctp_paramhdr param_hdr;
        struct in6_addr addr;
-} sctp_ipv6addr_param_t;
+};
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
-typedef struct sctp_cookie_preserve_param {
+struct sctp_cookie_preserve_param {
        struct sctp_paramhdr param_hdr;
-       __be32          lifespan_increment;
-} sctp_cookie_preserve_param_t;
+       __be32 lifespan_increment;
+};
 
 /* Section 3.3.2.1 Host Name Address (11) */
-typedef struct sctp_hostname_param {
+struct sctp_hostname_param {
        struct sctp_paramhdr param_hdr;
        uint8_t hostname[0];
-} sctp_hostname_param_t;
+};
 
 /* Section 3.3.2.1 Supported Address Types (12) */
-typedef struct sctp_supported_addrs_param {
+struct sctp_supported_addrs_param {
        struct sctp_paramhdr param_hdr;
        __be16 types[0];
-} sctp_supported_addrs_param_t;
-
-/* Appendix A. ECN Capable (32768) */
-typedef struct sctp_ecn_capable_param {
-       struct sctp_paramhdr param_hdr;
-} sctp_ecn_capable_param_t;
+};
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
-typedef struct sctp_adaptation_ind_param {
+struct sctp_adaptation_ind_param {
        struct sctp_paramhdr param_hdr;
        __be32 adaptation_ind;
-} sctp_adaptation_ind_param_t;
+};
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
-typedef struct sctp_supported_ext_param {
+struct sctp_supported_ext_param {
        struct sctp_paramhdr param_hdr;
        __u8 chunks[0];
-} sctp_supported_ext_param_t;
+};
 
 /* AUTH Section 3.1 Random */
-typedef struct sctp_random_param {
+struct sctp_random_param {
        struct sctp_paramhdr param_hdr;
        __u8 random_val[0];
-} sctp_random_param_t;
+};
 
 /* AUTH Section 3.2 Chunk List */
-typedef struct sctp_chunks_param {
+struct sctp_chunks_param {
        struct sctp_paramhdr param_hdr;
        __u8 chunks[0];
-} sctp_chunks_param_t;
+};
 
 /* AUTH Section 3.3 HMAC Algorithm */
-typedef struct sctp_hmac_algo_param {
+struct sctp_hmac_algo_param {
        struct sctp_paramhdr param_hdr;
        __be16 hmac_ids[0];
-} sctp_hmac_algo_param_t;
+};
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
index dbe29b6..4093552 100644 (file)
@@ -463,39 +463,38 @@ enum {
 
 enum {
        SKB_GSO_TCPV4 = 1 << 0,
-       SKB_GSO_UDP = 1 << 1,
 
        /* This indicates the skb is from an untrusted source. */
-       SKB_GSO_DODGY = 1 << 2,
+       SKB_GSO_DODGY = 1 << 1,
 
        /* This indicates the tcp segment has CWR set. */
-       SKB_GSO_TCP_ECN = 1 << 3,
+       SKB_GSO_TCP_ECN = 1 << 2,
 
-       SKB_GSO_TCP_FIXEDID = 1 << 4,
+       SKB_GSO_TCP_FIXEDID = 1 << 3,
 
-       SKB_GSO_TCPV6 = 1 << 5,
+       SKB_GSO_TCPV6 = 1 << 4,
 
-       SKB_GSO_FCOE = 1 << 6,
+       SKB_GSO_FCOE = 1 << 5,
 
-       SKB_GSO_GRE = 1 << 7,
+       SKB_GSO_GRE = 1 << 6,
 
-       SKB_GSO_GRE_CSUM = 1 << 8,
+       SKB_GSO_GRE_CSUM = 1 << 7,
 
-       SKB_GSO_IPXIP4 = 1 << 9,
+       SKB_GSO_IPXIP4 = 1 << 8,
 
-       SKB_GSO_IPXIP6 = 1 << 10,
+       SKB_GSO_IPXIP6 = 1 << 9,
 
-       SKB_GSO_UDP_TUNNEL = 1 << 11,
+       SKB_GSO_UDP_TUNNEL = 1 << 10,
 
-       SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12,
+       SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-       SKB_GSO_PARTIAL = 1 << 13,
+       SKB_GSO_PARTIAL = 1 << 12,
 
-       SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+       SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
 
-       SKB_GSO_SCTP = 1 << 15,
+       SKB_GSO_SCTP = 1 << 14,
 
-       SKB_GSO_ESP = 1 << 16,
+       SKB_GSO_ESP = 1 << 15,
 };
 
 #if BITS_PER_LONG > 32
@@ -945,12 +944,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
        return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
-static inline struct sk_buff *alloc_skb_head(gfp_t priority)
-{
-       return __alloc_skb_head(priority, -1);
-}
-
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
index 5209b5e..32fb046 100644 (file)
@@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
                case VIRTIO_NET_HDR_GSO_TCPV6:
                        gso_type = SKB_GSO_TCPV6;
                        break;
-               case VIRTIO_NET_HDR_GSO_UDP:
-                       gso_type = SKB_GSO_UDP;
-                       break;
                default:
                        return -EINVAL;
                }
@@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
                        hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
                else if (sinfo->gso_type & SKB_GSO_TCPV6)
                        hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-               else if (sinfo->gso_type & SKB_GSO_UDP)
-                       hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
                else
                        return -EINVAL;
                if (sinfo->gso_type & SKB_GSO_TCP_ECN)
index 53b1a2c..afb37f8 100644 (file)
@@ -58,7 +58,6 @@ struct unix_sock {
        struct list_head        link;
        atomic_long_t           inflight;
        spinlock_t              lock;
-       unsigned char           recursion_level;
        unsigned long           gc_flags;
 #define UNIX_GC_CANDIDATE      0
 #define UNIX_GC_MAYBE_CYCLE    1
index 58969b9..88da272 100644 (file)
@@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
        return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
 }
 
-static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
-{
-       return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
-}
-
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 {
        struct dsa_switch_tree *dst = ds->dst;
index bae198b..f3dc61b 100644 (file)
@@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family)
        return 0;
 }
 
-#define FLOW_DIR_IN    0
-#define FLOW_DIR_OUT   1
-#define FLOW_DIR_FWD   2
-
-struct net;
-struct sock;
-struct flow_cache_ops;
-
-struct flow_cache_object {
-       const struct flow_cache_ops *ops;
-};
-
-struct flow_cache_ops {
-       struct flow_cache_object *(*get)(struct flow_cache_object *);
-       int (*check)(struct flow_cache_object *);
-       void (*delete)(struct flow_cache_object *);
-};
-
-typedef struct flow_cache_object *(*flow_resolve_t)(
-               struct net *net, const struct flowi *key, u16 family,
-               u8 dir, struct flow_cache_object *oldobj, void *ctx);
-
-struct flow_cache_object *flow_cache_lookup(struct net *net,
-                                           const struct flowi *key, u16 family,
-                                           u8 dir, flow_resolve_t resolver,
-                                           void *ctx);
-int flow_cache_init(struct net *net);
-void flow_cache_fini(struct net *net);
-void flow_cache_hp_init(void);
-
-void flow_cache_flush(struct net *net);
-void flow_cache_flush_deferred(struct net *net);
-extern atomic_t flow_cache_genid;
-
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
 static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
deleted file mode 100644 (file)
index 51eb971..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NET_FLOWCACHE_H
-#define _NET_FLOWCACHE_H
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-
-struct flow_cache_percpu {
-       struct hlist_head               *hash_table;
-       unsigned int                    hash_count;
-       u32                             hash_rnd;
-       int                             hash_rnd_recalc;
-       struct tasklet_struct           flush_tasklet;
-};
-
-struct flow_cache {
-       u32                             hash_shift;
-       struct flow_cache_percpu __percpu *percpu;
-       struct hlist_node               node;
-       unsigned int                    low_watermark;
-       unsigned int                    high_watermark;
-       struct timer_list               rnd_timer;
-};
-#endif /* _NET_FLOWCACHE_H */
index f2a215f..950ed18 100644 (file)
@@ -33,18 +33,12 @@ struct inetpeer_addr {
 };
 
 struct inet_peer {
-       /* group together avl_left,avl_right,v4daddr to speedup lookups */
-       struct inet_peer __rcu  *avl_left, *avl_right;
+       struct rb_node          rb_node;
        struct inetpeer_addr    daddr;
-       __u32                   avl_height;
 
        u32                     metrics[RTAX_MAX];
        u32                     rate_tokens;    /* rate limiting for ICMP */
        unsigned long           rate_last;
-       union {
-               struct list_head        gc_list;
-               struct rcu_head     gc_rcu;
-       };
        /*
         * Once inet_peer is queued for deletion (refcnt == 0), following field
         * is not available: rid
@@ -55,7 +49,6 @@ struct inet_peer {
                        atomic_t                        rid;            /* Frag reception counter */
                };
                struct rcu_head         rcu;
-               struct inet_peer        *gc_next;
        };
 
        /* following fields might be frequently dirtied */
@@ -64,7 +57,7 @@ struct inet_peer {
 };
 
 struct inet_peer_base {
-       struct inet_peer __rcu  *root;
+       struct rb_root          rb_root;
        seqlock_t               lock;
        int                     total;
 };
index 1990569..907d39a 100644 (file)
@@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
        struct rt6_info *rt = (struct rt6_info *)dst;
 
        return rt->rt6i_flags & RTF_ANYCAST ||
-               (rt->rt6i_dst.plen != 128 &&
+               (rt->rt6i_dst.plen < 127 &&
                 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
 }
 
index 27bb963..6115216 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 #include <net/dst_ops.h>
-#include <net/flowcache.h>
 
 struct ctl_table_header;
 
@@ -73,16 +72,6 @@ struct netns_xfrm {
        spinlock_t xfrm_state_lock;
        spinlock_t xfrm_policy_lock;
        struct mutex xfrm_cfg_mutex;
-
-       /* flow cache part */
-       struct flow_cache       flow_cache_global;
-       atomic_t                flow_cache_genid;
-       struct list_head        flow_cache_gc_list;
-       atomic_t                flow_cache_gc_count;
-       spinlock_t              flow_cache_gc_lock;
-       struct work_struct      flow_cache_gc_work;
-       struct work_struct      flow_cache_flush_work;
-       struct mutex            flow_flush_sem;
 };
 
 #endif
index 5ab29af..66cd763 100644 (file)
@@ -1556,9 +1556,9 @@ struct sctp_association {
                 * and authenticated chunk list.  All that is part of the
                 * cookie and these are just pointers to those locations
                 */
-               sctp_random_param_t *peer_random;
-               sctp_chunks_param_t *peer_chunks;
-               sctp_hmac_algo_param_t *peer_hmacs;
+               struct sctp_random_param *peer_random;
+               struct sctp_chunks_param *peer_chunks;
+               struct sctp_hmac_algo_param *peer_hmacs;
        } peer;
 
        /* State       : A state variable indicating what state the
index 7048329..4f056ea 100644 (file)
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX    ((unsigned)(120*HZ))
 #define TCP_RTO_MIN    ((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN        (2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))    /* RFC6298 2.1 initial RTO value        */
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))        /* RFC 1122 initial RTO value, now
                                                 * used as a fallback RTO for the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
                                                         * for local resources.
                                                         */
-#define TCP_REO_TIMEOUT_MIN    (2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME     (120*60*HZ)     /* two hours */
 #define TCP_KEEPALIVE_PROBES   9               /* Max of 9 keepalive probes    */
 #define TCP_KEEPALIVE_INTVL    (75*HZ)
index c0916ab..afb4929 100644 (file)
@@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
                      const struct km_event *c);
+void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
@@ -563,7 +564,6 @@ struct xfrm_policy {
        refcount_t              refcnt;
        struct timer_list       timer;
 
-       struct flow_cache_object flo;
        atomic_t                genid;
        u32                     priority;
        u32                     index;
@@ -978,7 +978,6 @@ struct xfrm_dst {
                struct rt6_info         rt6;
        } u;
        struct dst_entry *route;
-       struct flow_cache_object flo;
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
        int num_pols, num_xfrms;
        u32 xfrm_genid;
@@ -1226,9 +1225,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
        }
 }
 
-void xfrm_garbage_collect(struct net *net);
-void xfrm_garbage_collect_deferred(struct net *net);
-
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
@@ -1263,9 +1259,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
 {
        return 1;
 }
-static inline void xfrm_garbage_collect(struct net *net)
-{
-}
 #endif
 
 static __inline__
index 1b61357..7b1eb7b 100644 (file)
@@ -12,7 +12,8 @@
        FN(ABORTED)             \
        FN(DROP)                \
        FN(PASS)                \
-       FN(TX)
+       FN(TX)                  \
+       FN(REDIRECT)
 
 #define __XDP_ACT_TP_FN(x)     \
        TRACE_DEFINE_ENUM(XDP_##x);
@@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception,
                  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
 );
 
+TRACE_EVENT(xdp_redirect,
+
+       TP_PROTO(const struct net_device *from,
+                const struct net_device *to,
+                const struct bpf_prog *xdp, u32 act),
+
+       TP_ARGS(from, to, xdp, act),
+
+       TP_STRUCT__entry(
+               __string(name_from, from->name)
+               __string(name_to, to->name)
+               __array(u8, prog_tag, 8)
+               __field(u32, act)
+       ),
+
+       TP_fast_assign(
+               BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
+               memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
+               __assign_str(name_from, from->name);
+               __assign_str(name_to, to->name);
+               __entry->act = act;
+       ),
+
+       TP_printk("prog=%s from=%s to=%s action=%s",
+                 __print_hex_str(__entry->prog_tag, 8),
+                 __get_str(name_from), __get_str(name_to),
+                 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
+);
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
index e99e3e6..1106a8c 100644 (file)
@@ -104,6 +104,7 @@ enum bpf_map_type {
        BPF_MAP_TYPE_LPM_TRIE,
        BPF_MAP_TYPE_ARRAY_OF_MAPS,
        BPF_MAP_TYPE_HASH_OF_MAPS,
+       BPF_MAP_TYPE_DEVMAP,
 };
 
 enum bpf_prog_type {
@@ -347,6 +348,11 @@ union bpf_attr {
  *     @flags: bit 0 - if set, redirect to ingress instead of egress
  *             other bits - reserved
  *     Return: TC_ACT_REDIRECT
+ * int bpf_redirect_map(key, map, flags)
+ *     redirect to endpoint in map
+ *     @key: index in map to lookup
+ *     @map: fd of map to do lookup in
+ *     @flags: --
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -591,7 +597,8 @@ union bpf_attr {
        FN(get_socket_uid),             \
        FN(set_hash),                   \
        FN(setsockopt),                 \
-       FN(skb_adjust_room),
+       FN(skb_adjust_room),            \
+       FN(redirect_map),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -717,6 +724,7 @@ enum xdp_action {
        XDP_DROP,
        XDP_PASS,
        XDP_TX,
+       XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
index e1e5e65..48e9270 100644 (file)
@@ -2,6 +2,9 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
new file mode 100644 (file)
index 0000000..899364d
--- /dev/null
@@ -0,0 +1,431 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/* Devmaps primary use is as a backend map for XDP BPF helper call
+ * bpf_redirect_map(). Because XDP is mostly concerned with performance we
+ * spent some effort to ensure the datapath with redirect maps does not use
+ * any locking. This is a quick note on the details.
+ *
+ * We have three possible paths to get into the devmap control plane bpf
+ * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall
+ * will invoke an update, delete, or lookup operation. To ensure updates and
+ * deletes appear atomic from the datapath side xchg() is used to modify the
+ * netdev_map array. Then because the datapath does a lookup into the netdev_map
+ * array (read-only) from an RCU critical section we use call_rcu() to wait for
+ * an rcu grace period before free'ing the old data structures. This ensures the
+ * datapath always has a valid copy. However, the datapath does a "flush"
+ * operation that pushes any pending packets in the driver outside the RCU
+ * critical section. Each bpf_dtab_netdev tracks these pending operations using
+ * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed
+ * until all bits are cleared indicating outstanding flush operations have
+ * completed.
+ *
+ * BPF syscalls may race with BPF program calls on any of the update, delete
+ * or lookup operations. As noted above the xchg() operation also keep the
+ * netdev_map consistent in this case. From the devmap side BPF programs
+ * calling into these operations are the same as multiple user space threads
+ * making system calls.
+ *
+ * Finally, any of the above may race with a netdev_unregister notifier. The
+ * unregister notifier must search for net devices in the map structure that
+ * contain a reference to the net device and remove them. This is a two step
+ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
+ * check to see if the ifindex is the same as the net_device being removed.
+ * Unfortunately, the xchg() operations do not protect against this. To avoid
+ * potentially removing incorrect objects the dev_map_list_mutex protects
+ * conflicting netdev unregister and BPF syscall operations. Updates and
+ * deletes from a BPF program (done in rcu critical section) are blocked
+ * because of this mutex.
+ */
+#include <linux/bpf.h>
+#include <linux/jhash.h>
+#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
+#include "percpu_freelist.h"
+#include "bpf_lru_list.h"
+#include "map_in_map.h"
+
+struct bpf_dtab_netdev {
+       struct net_device *dev;
+       int key;
+       struct rcu_head rcu;
+       struct bpf_dtab *dtab;
+};
+
+struct bpf_dtab {
+       struct bpf_map map;
+       struct bpf_dtab_netdev **netdev_map;
+       unsigned long int __percpu *flush_needed;
+       struct list_head list;
+};
+
+static DEFINE_MUTEX(dev_map_list_mutex);
+static LIST_HEAD(dev_map_list);
+
+static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+{
+       struct bpf_dtab *dtab;
+       u64 cost;
+       int err;
+
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->key_size != 4 ||
+           attr->value_size != 4 || attr->map_flags)
+               return ERR_PTR(-EINVAL);
+
+       /* if value_size is bigger, the user space won't be able to
+        * access the elements.
+        */
+       if (attr->value_size > KMALLOC_MAX_SIZE)
+               return ERR_PTR(-E2BIG);
+
+       dtab = kzalloc(sizeof(*dtab), GFP_USER);
+       if (!dtab)
+               return ERR_PTR(-ENOMEM);
+
+       /* mandatory map attributes */
+       dtab->map.map_type = attr->map_type;
+       dtab->map.key_size = attr->key_size;
+       dtab->map.value_size = attr->value_size;
+       dtab->map.max_entries = attr->max_entries;
+       dtab->map.map_flags = attr->map_flags;
+
+       err = -ENOMEM;
+
+       /* make sure page count doesn't overflow */
+       cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
+       cost += BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
+       if (cost >= U32_MAX - PAGE_SIZE)
+               goto free_dtab;
+
+       dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+       /* if map size is larger than memlock limit, reject it early */
+       err = bpf_map_precharge_memlock(dtab->map.pages);
+       if (err)
+               goto free_dtab;
+
+       /* A per cpu bitfield with a bit per possible net device */
+       dtab->flush_needed = __alloc_percpu(
+                               BITS_TO_LONGS(attr->max_entries) *
+                               sizeof(unsigned long),
+                               __alignof__(unsigned long));
+       if (!dtab->flush_needed)
+               goto free_dtab;
+
+       dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+                                             sizeof(struct bpf_dtab_netdev *));
+       if (!dtab->netdev_map)
+               goto free_dtab;
+
+       mutex_lock(&dev_map_list_mutex);
+       list_add_tail(&dtab->list, &dev_map_list);
+       mutex_unlock(&dev_map_list_mutex);
+       return &dtab->map;
+
+free_dtab:
+       free_percpu(dtab->flush_needed);
+       kfree(dtab);
+       return ERR_PTR(err);
+}
+
+static void dev_map_free(struct bpf_map *map)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       int i, cpu;
+
+       /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+        * so the programs (can be more than one that used this map) were
+        * disconnected from events. Wait for outstanding critical sections in
+        * these programs to complete. The rcu critical section only guarantees
+        * no further reads against netdev_map. It does __not__ ensure pending
+        * flush operations (if any) are complete.
+        */
+       synchronize_rcu();
+
+       /* To ensure all pending flush operations have completed wait for flush
+        * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
+        * Because the above synchronize_rcu() ensures the map is disconnected
+        * from the program we can assume no new bits will be set.
+        */
+       for_each_online_cpu(cpu) {
+               unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
+
+               while (!bitmap_empty(bitmap, dtab->map.max_entries))
+                       cpu_relax();
+       }
+
+       /* Although we should no longer have datapath or bpf syscall operations
+        * at this point we we can still race with netdev notifier, hence the
+        * lock.
+        */
+       mutex_lock(&dev_map_list_mutex);
+       for (i = 0; i < dtab->map.max_entries; i++) {
+               struct bpf_dtab_netdev *dev;
+
+               dev = dtab->netdev_map[i];
+               if (!dev)
+                       continue;
+
+               dev_put(dev->dev);
+               kfree(dev);
+       }
+
+       /* At this point bpf program is detached and all pending operations
+        * _must_ be complete
+        */
+       list_del(&dtab->list);
+       mutex_unlock(&dev_map_list_mutex);
+       free_percpu(dtab->flush_needed);
+       bpf_map_area_free(dtab->netdev_map);
+       kfree(dtab);
+}
+
+static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       u32 index = key ? *(u32 *)key : U32_MAX;
+       u32 *next = (u32 *)next_key;
+
+       if (index >= dtab->map.max_entries) {
+               *next = 0;
+               return 0;
+       }
+
+       if (index == dtab->map.max_entries - 1)
+               return -ENOENT;
+
+       *next = index + 1;
+       return 0;
+}
+
+void __dev_map_insert_ctx(struct bpf_map *map, u32 key)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+
+       __set_bit(key, bitmap);
+}
+
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct bpf_dtab_netdev *dev;
+
+       if (key >= map->max_entries)
+               return NULL;
+
+       dev = READ_ONCE(dtab->netdev_map[key]);
+       return dev ? dev->dev : NULL;
+}
+
+/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
+ * from the driver before returning from its napi->poll() routine. The poll()
+ * routine is called either from busy_poll context or net_rx_action signaled
+ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
+ * net device can be torn down. On devmap tear down we ensure the ctx bitmap
+ * is zeroed before completing to ensure all flush operations have completed.
+ */
+void __dev_map_flush(struct bpf_map *map)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+       u32 bit;
+
+       for_each_set_bit(bit, bitmap, map->max_entries) {
+               struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+               struct net_device *netdev;
+
+               /* This is possible if the dev entry is removed by user space
+                * between xdp redirect and flush op.
+                */
+               if (unlikely(!dev))
+                       continue;
+
+               netdev = dev->dev;
+
+               __clear_bit(bit, bitmap);
+               if (unlikely(!netdev || !netdev->netdev_ops->ndo_xdp_flush))
+                       continue;
+
+               netdev->netdev_ops->ndo_xdp_flush(netdev);
+       }
+}
+
+/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
+ * update happens in parallel here a dev_put wont happen until after reading the
+ * ifindex.
+ */
+static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct bpf_dtab_netdev *dev;
+       u32 i = *(u32 *)key;
+
+       if (i >= map->max_entries)
+               return NULL;
+
+       dev = READ_ONCE(dtab->netdev_map[i]);
+       return dev ? &dev->dev->ifindex : NULL;
+}
+
+static void dev_map_flush_old(struct bpf_dtab_netdev *old_dev)
+{
+       if (old_dev->dev->netdev_ops->ndo_xdp_flush) {
+               struct net_device *fl = old_dev->dev;
+               unsigned long *bitmap;
+               int cpu;
+
+               for_each_online_cpu(cpu) {
+                       bitmap = per_cpu_ptr(old_dev->dtab->flush_needed, cpu);
+                       __clear_bit(old_dev->key, bitmap);
+
+                       fl->netdev_ops->ndo_xdp_flush(old_dev->dev);
+               }
+       }
+}
+
+static void __dev_map_entry_free(struct rcu_head *rcu)
+{
+       struct bpf_dtab_netdev *old_dev;
+
+       old_dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+       dev_map_flush_old(old_dev);
+       dev_put(old_dev->dev);
+       kfree(old_dev);
+}
+
+static int dev_map_delete_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct bpf_dtab_netdev *old_dev;
+       int k = *(u32 *)key;
+
+       if (k >= map->max_entries)
+               return -EINVAL;
+
+       /* Use synchronize_rcu() here to ensure any rcu critical sections
+        * have completed, but this does not guarantee a flush has happened
+        * yet. Because driver side rcu_read_lock/unlock only protects the
+        * running XDP program. However, for pending flush operations the
+        * dev and ctx are stored in another per cpu map. And additionally,
+        * the driver tear down ensures all soft irqs are complete before
+        * removing the net device in the case of dev_put equals zero.
+        */
+       mutex_lock(&dev_map_list_mutex);
+       old_dev = xchg(&dtab->netdev_map[k], NULL);
+       if (old_dev)
+               call_rcu(&old_dev->rcu, __dev_map_entry_free);
+       mutex_unlock(&dev_map_list_mutex);
+       return 0;
+}
+
+static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+                               u64 map_flags)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct net *net = current->nsproxy->net_ns;
+       struct bpf_dtab_netdev *dev, *old_dev;
+       u32 i = *(u32 *)key;
+       u32 ifindex = *(u32 *)value;
+
+       if (unlikely(map_flags > BPF_EXIST))
+               return -EINVAL;
+
+       if (unlikely(i >= dtab->map.max_entries))
+               return -E2BIG;
+
+       if (unlikely(map_flags == BPF_NOEXIST))
+               return -EEXIST;
+
+       if (!ifindex) {
+               dev = NULL;
+       } else {
+               dev = kmalloc(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN);
+               if (!dev)
+                       return -ENOMEM;
+
+               dev->dev = dev_get_by_index(net, ifindex);
+               if (!dev->dev) {
+                       kfree(dev);
+                       return -EINVAL;
+               }
+
+               dev->key = i;
+               dev->dtab = dtab;
+       }
+
+       /* Use call_rcu() here to ensure rcu critical sections have completed
+        * Remembering the driver side flush operation will happen before the
+        * net device is removed.
+        */
+       mutex_lock(&dev_map_list_mutex);
+       old_dev = xchg(&dtab->netdev_map[i], dev);
+       if (old_dev)
+               call_rcu(&old_dev->rcu, __dev_map_entry_free);
+       mutex_unlock(&dev_map_list_mutex);
+
+       return 0;
+}
+
+const struct bpf_map_ops dev_map_ops = {
+       .map_alloc = dev_map_alloc,
+       .map_free = dev_map_free,
+       .map_get_next_key = dev_map_get_next_key,
+       .map_lookup_elem = dev_map_lookup_elem,
+       .map_update_elem = dev_map_update_elem,
+       .map_delete_elem = dev_map_delete_elem,
+};
+
+static int dev_map_notification(struct notifier_block *notifier,
+                               ulong event, void *ptr)
+{
+       struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+       struct bpf_dtab *dtab;
+       int i;
+
+       switch (event) {
+       case NETDEV_UNREGISTER:
+               mutex_lock(&dev_map_list_mutex);
+               list_for_each_entry(dtab, &dev_map_list, list) {
+                       for (i = 0; i < dtab->map.max_entries; i++) {
+                               struct bpf_dtab_netdev *dev;
+
+                               dev = dtab->netdev_map[i];
+                               if (!dev ||
+                                   dev->dev->ifindex != netdev->ifindex)
+                                       continue;
+                               dev = xchg(&dtab->netdev_map[i], NULL);
+                               if (dev)
+                                       call_rcu(&dev->rcu,
+                                                __dev_map_entry_free);
+                       }
+               }
+               mutex_unlock(&dev_map_list_mutex);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block dev_map_notifier = {
+       .notifier_call = dev_map_notification,
+};
+
+static int __init dev_map_init(void)
+{
+       register_netdevice_notifier(&dev_map_notifier);
+       return 0;
+}
+
+subsys_initcall(dev_map_init);
index af9e84a..db6a289 100644 (file)
@@ -1283,6 +1283,14 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                    func_id != BPF_FUNC_current_task_under_cgroup)
                        goto error;
                break;
+       /* devmap returns a pointer to a live net_device ifindex that we cannot
+        * allow to be modified from bpf side. So do not allow lookup elements
+        * for now.
+        */
+       case BPF_MAP_TYPE_DEVMAP:
+               if (func_id != BPF_FUNC_redirect_map)
+                       goto error;
+               break;
        case BPF_MAP_TYPE_ARRAY_OF_MAPS:
        case BPF_MAP_TYPE_HASH_OF_MAPS:
                if (func_id != BPF_FUNC_map_lookup_elem)
@@ -1311,6 +1319,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
                        goto error;
                break;
+       case BPF_FUNC_redirect_map:
+               if (map->map_type != BPF_MAP_TYPE_DEVMAP)
+                       goto error;
+               break;
        default:
                break;
        }
index ab3b654..2af4f1c 100644 (file)
@@ -618,12 +618,8 @@ static void ifup(struct net_device *netdev)
 
 static void ifdown(struct net_device *netdev)
 {
-       int err;
-
        rtnl_lock();
-       err = dev_close(netdev);
-       if (err < 0)
-               BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+       dev_close(netdev);
        rtnl_unlock();
 }
 
index 79f9479..d501c42 100644 (file)
@@ -11,7 +11,6 @@ obj-y              += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
                        neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
                        sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
index 8515f8f..509af6c 100644 (file)
@@ -1413,7 +1413,7 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close_many(struct list_head *head)
+static void __dev_close_many(struct list_head *head)
 {
        struct net_device *dev;
 
@@ -1455,23 +1455,18 @@ static int __dev_close_many(struct list_head *head)
                dev->flags &= ~IFF_UP;
                netpoll_poll_enable(dev);
        }
-
-       return 0;
 }
 
-static int __dev_close(struct net_device *dev)
+static void __dev_close(struct net_device *dev)
 {
-       int retval;
        LIST_HEAD(single);
 
        list_add(&dev->close_list, &single);
-       retval = __dev_close_many(&single);
+       __dev_close_many(&single);
        list_del(&single);
-
-       return retval;
 }
 
-int dev_close_many(struct list_head *head, bool unlink)
+void dev_close_many(struct list_head *head, bool unlink)
 {
        struct net_device *dev, *tmp;
 
@@ -1488,8 +1483,6 @@ int dev_close_many(struct list_head *head, bool unlink)
                if (unlink)
                        list_del_init(&dev->close_list);
        }
-
-       return 0;
 }
 EXPORT_SYMBOL(dev_close_many);
 
@@ -1502,7 +1495,7 @@ EXPORT_SYMBOL(dev_close_many);
  *     is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *     chain.
  */
-int dev_close(struct net_device *dev)
+void dev_close(struct net_device *dev)
 {
        if (dev->flags & IFF_UP) {
                LIST_HEAD(single);
@@ -1511,7 +1504,6 @@ int dev_close(struct net_device *dev)
                dev_close_many(&single, true);
                list_del(&single);
        }
-       return 0;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -3865,6 +3857,121 @@ drop:
        return NET_RX_DROP;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+                                    struct bpf_prog *xdp_prog)
+{
+       struct xdp_buff xdp;
+       u32 act = XDP_DROP;
+       void *orig_data;
+       int hlen, off;
+       u32 mac_len;
+
+       /* Reinjected packets coming from act_mirred or similar should
+        * not get XDP generic processing.
+        */
+       if (skb_cloned(skb))
+               return XDP_PASS;
+
+       if (skb_linearize(skb))
+               goto do_drop;
+
+       /* The XDP program wants to see the packet starting at the MAC
+        * header.
+        */
+       mac_len = skb->data - skb_mac_header(skb);
+       hlen = skb_headlen(skb) + mac_len;
+       xdp.data = skb->data - mac_len;
+       xdp.data_end = xdp.data + hlen;
+       xdp.data_hard_start = skb->data - skb_headroom(skb);
+       orig_data = xdp.data;
+
+       act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+       off = xdp.data - orig_data;
+       if (off > 0)
+               __skb_pull(skb, off);
+       else if (off < 0)
+               __skb_push(skb, -off);
+
+       switch (act) {
+       case XDP_REDIRECT:
+       case XDP_TX:
+               __skb_push(skb, mac_len);
+               /* fall through */
+       case XDP_PASS:
+               break;
+
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               /* fall through */
+       case XDP_ABORTED:
+               trace_xdp_exception(skb->dev, xdp_prog, act);
+               /* fall through */
+       case XDP_DROP:
+       do_drop:
+               kfree_skb(skb);
+               break;
+       }
+
+       return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+       struct net_device *dev = skb->dev;
+       struct netdev_queue *txq;
+       bool free_skb = true;
+       int cpu, rc;
+
+       txq = netdev_pick_tx(dev, skb, NULL);
+       cpu = smp_processor_id();
+       HARD_TX_LOCK(dev, txq, cpu);
+       if (!netif_xmit_stopped(txq)) {
+               rc = netdev_start_xmit(skb, dev, txq, 0);
+               if (dev_xmit_complete(rc))
+                       free_skb = false;
+       }
+       HARD_TX_UNLOCK(dev, txq);
+       if (free_skb) {
+               trace_xdp_exception(dev, xdp_prog, XDP_TX);
+               kfree_skb(skb);
+       }
+}
+
+static struct static_key generic_xdp_needed __read_mostly;
+
+static int do_xdp_generic(struct sk_buff *skb)
+{
+       struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+
+       if (xdp_prog) {
+               u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+               int err;
+
+               if (act != XDP_PASS) {
+                       switch (act) {
+                       case XDP_REDIRECT:
+                               err = xdp_do_generic_redirect(skb->dev, skb);
+                               if (err)
+                                       goto out_redir;
+                       /* fallthru to submit skb */
+                       case XDP_TX:
+                               generic_xdp_tx(skb, xdp_prog);
+                               break;
+                       }
+                       return XDP_DROP;
+               }
+       }
+       return XDP_PASS;
+out_redir:
+       trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT);
+       kfree_skb(skb);
+       return XDP_DROP;
+}
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
        int ret;
@@ -3872,6 +3979,18 @@ static int netif_rx_internal(struct sk_buff *skb)
        net_timestamp_check(netdev_tstamp_prequeue, skb);
 
        trace_netif_rx(skb);
+
+       if (static_key_false(&generic_xdp_needed)) {
+               int ret = do_xdp_generic(skb);
+
+               /* Consider XDP consuming the packet a success from
+                * the netdev point of view we do not want to count
+                * this as an error.
+                */
+               if (ret != XDP_PASS)
+                       return NET_RX_SUCCESS;
+       }
+
 #ifdef CONFIG_RPS
        if (static_key_false(&rps_needed)) {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4338,8 +4457,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
        return ret;
 }
 
-static struct static_key generic_xdp_needed __read_mostly;
-
 static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 {
        struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4373,89 +4490,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
        return ret;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-                                    struct bpf_prog *xdp_prog)
-{
-       struct xdp_buff xdp;
-       u32 act = XDP_DROP;
-       void *orig_data;
-       int hlen, off;
-       u32 mac_len;
-
-       /* Reinjected packets coming from act_mirred or similar should
-        * not get XDP generic processing.
-        */
-       if (skb_cloned(skb))
-               return XDP_PASS;
-
-       if (skb_linearize(skb))
-               goto do_drop;
-
-       /* The XDP program wants to see the packet starting at the MAC
-        * header.
-        */
-       mac_len = skb->data - skb_mac_header(skb);
-       hlen = skb_headlen(skb) + mac_len;
-       xdp.data = skb->data - mac_len;
-       xdp.data_end = xdp.data + hlen;
-       xdp.data_hard_start = skb->data - skb_headroom(skb);
-       orig_data = xdp.data;
-
-       act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
-       off = xdp.data - orig_data;
-       if (off > 0)
-               __skb_pull(skb, off);
-       else if (off < 0)
-               __skb_push(skb, -off);
-
-       switch (act) {
-       case XDP_TX:
-               __skb_push(skb, mac_len);
-               /* fall through */
-       case XDP_PASS:
-               break;
-
-       default:
-               bpf_warn_invalid_xdp_action(act);
-               /* fall through */
-       case XDP_ABORTED:
-               trace_xdp_exception(skb->dev, xdp_prog, act);
-               /* fall through */
-       case XDP_DROP:
-       do_drop:
-               kfree_skb(skb);
-               break;
-       }
-
-       return act;
-}
-
-/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
- */
-static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
-{
-       struct net_device *dev = skb->dev;
-       struct netdev_queue *txq;
-       bool free_skb = true;
-       int cpu, rc;
-
-       txq = netdev_pick_tx(dev, skb, NULL);
-       cpu = smp_processor_id();
-       HARD_TX_LOCK(dev, txq, cpu);
-       if (!netif_xmit_stopped(txq)) {
-               rc = netdev_start_xmit(skb, dev, txq, 0);
-               if (dev_xmit_complete(rc))
-                       free_skb = false;
-       }
-       HARD_TX_UNLOCK(dev, txq);
-       if (free_skb) {
-               trace_xdp_exception(dev, xdp_prog, XDP_TX);
-               kfree_skb(skb);
-       }
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
        int ret;
@@ -4468,17 +4502,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
        rcu_read_lock();
 
        if (static_key_false(&generic_xdp_needed)) {
-               struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-
-               if (xdp_prog) {
-                       u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+               int ret = do_xdp_generic(skb);
 
-                       if (act != XDP_PASS) {
-                               rcu_read_unlock();
-                               if (act == XDP_TX)
-                                       generic_xdp_tx(skb, xdp_prog);
-                               return NET_RX_DROP;
-                       }
+               if (ret != XDP_PASS) {
+                       rcu_read_unlock();
+                       return NET_RX_DROP;
                }
        }
 
@@ -6689,8 +6717,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
         */
 
        ret = 0;
-       if ((old_flags ^ flags) & IFF_UP)
-               ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+       if ((old_flags ^ flags) & IFF_UP) {
+               if (old_flags & IFF_UP)
+                       __dev_close(dev);
+               else
+                       ret = __dev_open(dev);
+       }
 
        if ((flags ^ dev->gflags) & IFF_PROMISC) {
                int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@ -7235,24 +7267,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
                features &= ~NETIF_F_GSO;
        }
 
-       /* UFO needs SG and checksumming */
-       if (features & NETIF_F_UFO) {
-               /* maybe split UFO into V4 and V6? */
-               if (!(features & NETIF_F_HW_CSUM) &&
-                   ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
-                    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
-                       netdev_dbg(dev,
-                               "Dropping NETIF_F_UFO since no checksum offload features.\n");
-                       features &= ~NETIF_F_UFO;
-               }
-
-               if (!(features & NETIF_F_SG)) {
-                       netdev_dbg(dev,
-                               "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
-                       features &= ~NETIF_F_UFO;
-               }
-       }
-
        /* GSO partial features require GSO partial be set */
        if ((features & dev->gso_partial_features) &&
            !(features & NETIF_F_GSO_PARTIAL)) {
index 674b6c9..78408ab 100644 (file)
@@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_LRO_BIT] =              "rx-lro",
 
        [NETIF_F_TSO_BIT] =              "tx-tcp-segmentation",
-       [NETIF_F_UFO_BIT] =              "tx-udp-fragmentation",
        [NETIF_F_GSO_ROBUST_BIT] =       "tx-gso-robust",
        [NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
        [NETIF_F_TSO_MANGLEID_BIT] =     "tx-tcp-mangleid-segmentation",
@@ -299,9 +298,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
        case ETHTOOL_GTSO:
        case ETHTOOL_STSO:
                return NETIF_F_ALL_TSO;
-       case ETHTOOL_GUFO:
-       case ETHTOOL_SUFO:
-               return NETIF_F_UFO;
        case ETHTOOL_GGSO:
        case ETHTOOL_SGSO:
                return NETIF_F_GSO;
@@ -2555,7 +2551,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        case ETHTOOL_GPHYSTATS:
        case ETHTOOL_GTSO:
        case ETHTOOL_GPERMADDR:
-       case ETHTOOL_GUFO:
        case ETHTOOL_GGSO:
        case ETHTOOL_GGRO:
        case ETHTOOL_GFLAGS:
@@ -2723,7 +2718,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        case ETHTOOL_GRXCSUM:
        case ETHTOOL_GSG:
        case ETHTOOL_GTSO:
-       case ETHTOOL_GUFO:
        case ETHTOOL_GGSO:
        case ETHTOOL_GGRO:
                rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
@@ -2732,7 +2726,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        case ETHTOOL_SRXCSUM:
        case ETHTOOL_SSG:
        case ETHTOOL_STSO:
-       case ETHTOOL_SUFO:
        case ETHTOOL_SGSO:
        case ETHTOOL_SGRO:
                rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
index f44fc22..7e97086 100644 (file)
@@ -55,6 +55,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <linux/bpf_trace.h>
 
 /**
  *     sk_filter_trim_cap - run a packet through a socket filter
@@ -1778,6 +1779,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 struct redirect_info {
        u32 ifindex;
        u32 flags;
+       struct bpf_map *map;
+       struct bpf_map *map_to_flush;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1791,6 +1794,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
        ri->ifindex = ifindex;
        ri->flags = flags;
+       ri->map = NULL;
 
        return TC_ACT_REDIRECT;
 }
@@ -1818,6 +1822,29 @@ static const struct bpf_func_proto bpf_redirect_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+       if (unlikely(flags))
+               return XDP_ABORTED;
+
+       ri->ifindex = ifindex;
+       ri->flags = flags;
+       ri->map = map;
+
+       return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_map_proto = {
+       .func           = bpf_redirect_map,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
        return task_get_classid(skb);
@@ -2024,8 +2051,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
                return ret;
 
        if (skb_is_gso(skb)) {
-               /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
-                * be changed into SKB_GSO_TCPV6.
+               /* SKB_GSO_TCPV4 needs to be changed into
+                * SKB_GSO_TCPV6.
                 */
                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
                        skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@ -2060,8 +2087,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
                return ret;
 
        if (skb_is_gso(skb)) {
-               /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
-                * be changed into SKB_GSO_TCPV4.
+               /* SKB_GSO_TCPV6 needs to be changed into
+                * SKB_GSO_TCPV4.
                 */
                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
                        skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@ -2412,6 +2439,140 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static int __bpf_tx_xdp(struct net_device *dev,
+                       struct bpf_map *map,
+                       struct xdp_buff *xdp,
+                       u32 index)
+{
+       int err;
+
+       if (!dev->netdev_ops->ndo_xdp_xmit) {
+               bpf_warn_invalid_xdp_redirect(dev->ifindex);
+               return -EOPNOTSUPP;
+       }
+
+       err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+       if (err)
+               return err;
+
+       if (map)
+               __dev_map_insert_ctx(map, index);
+       else
+               dev->netdev_ops->ndo_xdp_flush(dev);
+
+       return err;
+}
+
+void xdp_do_flush_map(void)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct bpf_map *map = ri->map_to_flush;
+
+       ri->map = NULL;
+       ri->map_to_flush = NULL;
+
+       if (map)
+               __dev_map_flush(map);
+}
+EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+
+int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+                       struct bpf_prog *xdp_prog)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct bpf_map *map = ri->map;
+       u32 index = ri->ifindex;
+       struct net_device *fwd;
+       int err = -EINVAL;
+
+       ri->ifindex = 0;
+       ri->map = NULL;
+
+       fwd = __dev_map_lookup_elem(map, index);
+       if (!fwd)
+               goto out;
+
+       if (ri->map_to_flush && (ri->map_to_flush != map))
+               xdp_do_flush_map();
+
+       err = __bpf_tx_xdp(fwd, map, xdp, index);
+       if (likely(!err))
+               ri->map_to_flush = map;
+
+out:
+       trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT);
+       return err;
+}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+                   struct bpf_prog *xdp_prog)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       struct net_device *fwd;
+
+       if (ri->map)
+               return xdp_do_redirect_map(dev, xdp, xdp_prog);
+
+       fwd = dev_get_by_index_rcu(dev_net(dev), ri->ifindex);
+       ri->ifindex = 0;
+       ri->map = NULL;
+       if (unlikely(!fwd)) {
+               bpf_warn_invalid_xdp_redirect(ri->ifindex);
+               return -EINVAL;
+       }
+
+       trace_xdp_redirect(dev, fwd, xdp_prog, XDP_REDIRECT);
+
+       return __bpf_tx_xdp(fwd, NULL, xdp, 0);
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect);
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+       unsigned int len;
+
+       dev = dev_get_by_index_rcu(dev_net(dev), ri->ifindex);
+       ri->ifindex = 0;
+       if (unlikely(!dev)) {
+               bpf_warn_invalid_xdp_redirect(ri->ifindex);
+               goto err;
+       }
+
+       if (unlikely(!(dev->flags & IFF_UP)))
+               goto err;
+
+       len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+       if (skb->len > len)
+               goto err;
+
+       skb->dev = dev;
+       return 0;
+err:
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+
+BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+{
+       struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+       if (unlikely(flags))
+               return XDP_ABORTED;
+
+       ri->ifindex = ifindex;
+       ri->flags = flags;
+       return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+       .func           = bpf_xdp_redirect,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
        if (func == bpf_skb_vlan_push ||
@@ -3011,6 +3172,10 @@ xdp_func_proto(enum bpf_func_id func_id)
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_xdp_adjust_head:
                return &bpf_xdp_adjust_head_proto;
+       case BPF_FUNC_redirect:
+               return &bpf_xdp_redirect_proto;
+       case BPF_FUNC_redirect_map:
+               return &bpf_redirect_map_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -3310,6 +3475,11 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+void bpf_warn_invalid_xdp_redirect(u32 ifindex)
+{
+       WARN_ONCE(1, "Illegal XDP redirect to unsupported device ifindex(%i)\n", ifindex);
+}
+
 static bool __is_valid_sock_ops_access(int off, int size)
 {
        if (off < 0 || off >= sizeof(struct bpf_sock_ops))
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644 (file)
index f7f5d19..0000000
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-       union {
-               struct hlist_node       hlist;
-               struct list_head        gc_list;
-       } u;
-       struct net                      *net;
-       u16                             family;
-       u8                              dir;
-       u32                             genid;
-       struct flowi                    key;
-       struct flow_cache_object        *object;
-};
-
-struct flow_flush_info {
-       struct flow_cache               *cache;
-       atomic_t                        cpuleft;
-       struct completion               completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)    (1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD           (10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-       struct flow_cache *fc = (void *) arg;
-       int i;
-
-       for_each_possible_cpu(i)
-               per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-       fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-       add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-                               struct netns_xfrm *xfrm)
-{
-       if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-               return 0;
-       if (fle->object && !fle->object->ops->check(fle->object))
-               return 0;
-       return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-                               struct netns_xfrm *xfrm)
-{
-       if (fle->object)
-               fle->object->ops->delete(fle->object);
-       kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-       struct list_head gc_list;
-       struct flow_cache_entry *fce, *n;
-       struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-                                               flow_cache_gc_work);
-
-       INIT_LIST_HEAD(&gc_list);
-       spin_lock_bh(&xfrm->flow_cache_gc_lock);
-       list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-       spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-       list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-               flow_entry_kill(fce, xfrm);
-               atomic_dec(&xfrm->flow_cache_gc_count);
-       }
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-                                    unsigned int deleted,
-                                    struct list_head *gc_list,
-                                    struct netns_xfrm *xfrm)
-{
-       if (deleted) {
-               atomic_add(deleted, &xfrm->flow_cache_gc_count);
-               fcp->hash_count -= deleted;
-               spin_lock_bh(&xfrm->flow_cache_gc_lock);
-               list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-               spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-               schedule_work(&xfrm->flow_cache_gc_work);
-       }
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-                               struct flow_cache_percpu *fcp,
-                               unsigned int shrink_to)
-{
-       struct flow_cache_entry *fle;
-       struct hlist_node *tmp;
-       LIST_HEAD(gc_list);
-       unsigned int deleted = 0;
-       struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-                                               flow_cache_global);
-       unsigned int i;
-
-       for (i = 0; i < flow_cache_hash_size(fc); i++) {
-               unsigned int saved = 0;
-
-               hlist_for_each_entry_safe(fle, tmp,
-                                         &fcp->hash_table[i], u.hlist) {
-                       if (saved < shrink_to &&
-                           flow_entry_valid(fle, xfrm)) {
-                               saved++;
-                       } else {
-                               deleted++;
-                               hlist_del(&fle->u.hlist);
-                               list_add_tail(&fle->u.gc_list, &gc_list);
-                       }
-               }
-       }
-
-       flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-                             struct flow_cache_percpu *fcp)
-{
-       unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-       __flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-                             struct flow_cache_percpu *fcp)
-{
-       get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-       fcp->hash_rnd_recalc = 0;
-       __flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-                         struct flow_cache_percpu *fcp,
-                         const struct flowi *key,
-                         unsigned int keysize)
-{
-       const u32 *k = (const u32 *) key;
-       const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-       return jhash2(k, length, fcp->hash_rnd)
-               & (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-                           unsigned int keysize)
-{
-       const flow_compare_t *k1, *k1_lim, *k2;
-
-       k1 = (const flow_compare_t *) key1;
-       k1_lim = k1 + keysize;
-
-       k2 = (const flow_compare_t *) key2;
-
-       do {
-               if (*k1++ != *k2++)
-                       return 1;
-       } while (k1 < k1_lim);
-
-       return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-                 flow_resolve_t resolver, void *ctx)
-{
-       struct flow_cache *fc = &net->xfrm.flow_cache_global;
-       struct flow_cache_percpu *fcp;
-       struct flow_cache_entry *fle, *tfle;
-       struct flow_cache_object *flo;
-       unsigned int keysize;
-       unsigned int hash;
-
-       local_bh_disable();
-       fcp = this_cpu_ptr(fc->percpu);
-
-       fle = NULL;
-       flo = NULL;
-
-       keysize = flow_key_size(family);
-       if (!keysize)
-               goto nocache;
-
-       /* Packet really early in init?  Making flow_cache_init a
-        * pre-smp initcall would solve this.  --RR */
-       if (!fcp->hash_table)
-               goto nocache;
-
-       if (fcp->hash_rnd_recalc)
-               flow_new_hash_rnd(fc, fcp);
-
-       hash = flow_hash_code(fc, fcp, key, keysize);
-       hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-               if (tfle->net == net &&
-                   tfle->family == family &&
-                   tfle->dir == dir &&
-                   flow_key_compare(key, &tfle->key, keysize) == 0) {
-                       fle = tfle;
-                       break;
-               }
-       }
-
-       if (unlikely(!fle)) {
-               if (fcp->hash_count > fc->high_watermark)
-                       flow_cache_shrink(fc, fcp);
-
-               if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-                   2 * num_online_cpus() * fc->high_watermark) {
-                       flo = ERR_PTR(-ENOBUFS);
-                       goto ret_object;
-               }
-
-               fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-               if (fle) {
-                       fle->net = net;
-                       fle->family = family;
-                       fle->dir = dir;
-                       memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-                       fle->object = NULL;
-                       hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-                       fcp->hash_count++;
-               }
-       } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-               flo = fle->object;
-               if (!flo)
-                       goto ret_object;
-               flo = flo->ops->get(flo);
-               if (flo)
-                       goto ret_object;
-       } else if (fle->object) {
-               flo = fle->object;
-               flo->ops->delete(flo);
-               fle->object = NULL;
-       }
-
-nocache:
-       flo = NULL;
-       if (fle) {
-               flo = fle->object;
-               fle->object = NULL;
-       }
-       flo = resolver(net, key, family, dir, flo, ctx);
-       if (fle) {
-               fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-               if (!IS_ERR(flo))
-                       fle->object = flo;
-               else
-                       fle->genid--;
-       } else {
-               if (!IS_ERR_OR_NULL(flo))
-                       flo->ops->delete(flo);
-       }
-ret_object:
-       local_bh_enable();
-       return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-       struct flow_flush_info *info = (void *)data;
-       struct flow_cache *fc = info->cache;
-       struct flow_cache_percpu *fcp;
-       struct flow_cache_entry *fle;
-       struct hlist_node *tmp;
-       LIST_HEAD(gc_list);
-       unsigned int deleted = 0;
-       struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-                                               flow_cache_global);
-       unsigned int i;
-
-       fcp = this_cpu_ptr(fc->percpu);
-       for (i = 0; i < flow_cache_hash_size(fc); i++) {
-               hlist_for_each_entry_safe(fle, tmp,
-                                         &fcp->hash_table[i], u.hlist) {
-                       if (flow_entry_valid(fle, xfrm))
-                               continue;
-
-                       deleted++;
-                       hlist_del(&fle->u.hlist);
-                       list_add_tail(&fle->u.gc_list, &gc_list);
-               }
-       }
-
-       flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-       if (atomic_dec_and_test(&info->cpuleft))
-               complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-       struct flow_cache_percpu *fcp;
-       unsigned int i;
-
-       fcp = per_cpu_ptr(fc->percpu, cpu);
-       for (i = 0; i < flow_cache_hash_size(fc); i++)
-               if (!hlist_empty(&fcp->hash_table[i]))
-                       return 0;
-       return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-       struct flow_flush_info *info = data;
-       struct tasklet_struct *tasklet;
-
-       tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-       tasklet->data = (unsigned long)info;
-       tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-       struct flow_flush_info info;
-       cpumask_var_t mask;
-       int i, self;
-
-       /* Track which cpus need flushing to avoid disturbing all cores. */
-       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-               return;
-       cpumask_clear(mask);
-
-       /* Don't want cpus going down or up during this. */
-       get_online_cpus();
-       mutex_lock(&net->xfrm.flow_flush_sem);
-       info.cache = &net->xfrm.flow_cache_global;
-       for_each_online_cpu(i)
-               if (!flow_cache_percpu_empty(info.cache, i))
-                       cpumask_set_cpu(i, mask);
-       atomic_set(&info.cpuleft, cpumask_weight(mask));
-       if (atomic_read(&info.cpuleft) == 0)
-               goto done;
-
-       init_completion(&info.completion);
-
-       local_bh_disable();
-       self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-       on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-       if (self)
-               flow_cache_flush_tasklet((unsigned long)&info);
-       local_bh_enable();
-
-       wait_for_completion(&info.completion);
-
-done:
-       mutex_unlock(&net->xfrm.flow_flush_sem);
-       put_online_cpus();
-       free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-       struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-                                               flow_cache_flush_work);
-       struct net *net = container_of(xfrm, struct net, xfrm);
-
-       flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-       schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-       struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-       unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-       if (!fcp->hash_table) {
-               fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-               if (!fcp->hash_table) {
-                       pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-                       return -ENOMEM;
-               }
-               fcp->hash_rnd_recalc = 1;
-               fcp->hash_count = 0;
-               tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-       }
-       return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-       struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-       return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-       struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-       struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-       __flow_cache_shrink(fc, fcp, 0);
-       return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-       int i;
-       struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-       if (!flow_cachep)
-               flow_cachep = kmem_cache_create("flow_cache",
-                                               sizeof(struct flow_cache_entry),
-                                               0, SLAB_PANIC, NULL);
-       spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-       INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-       INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-       INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-       mutex_init(&net->xfrm.flow_flush_sem);
-       atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-       fc->hash_shift = 10;
-       fc->low_watermark = 2 * flow_cache_hash_size(fc);
-       fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-       fc->percpu = alloc_percpu(struct flow_cache_percpu);
-       if (!fc->percpu)
-               return -ENOMEM;
-
-       if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-               goto err;
-
-       setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-                   (unsigned long) fc);
-       fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-       add_timer(&fc->rnd_timer);
-
-       return 0;
-
-err:
-       for_each_possible_cpu(i) {
-               struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-               kfree(fcp->hash_table);
-               fcp->hash_table = NULL;
-       }
-
-       free_percpu(fc->percpu);
-       fc->percpu = NULL;
-
-       return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-       int i;
-       struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-       del_timer_sync(&fc->rnd_timer);
-
-       cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-       for_each_possible_cpu(i) {
-               struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-               kfree(fcp->hash_table);
-               fcp->hash_table = NULL;
-       }
-
-       free_percpu(fc->percpu);
-       fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-       int ret;
-
-       ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-                                     "net/flow:prepare",
-                                     flow_cache_cpu_up_prep,
-                                     flow_cache_cpu_dead);
-       WARN_ON(ret < 0);
-}
index f990eb8..84bdfa2 100644 (file)
@@ -158,31 +158,6 @@ out:
  *
  */
 
-struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
-{
-       struct sk_buff *skb;
-
-       /* Get the HEAD */
-       skb = kmem_cache_alloc_node(skbuff_head_cache,
-                                   gfp_mask & ~__GFP_DMA, node);
-       if (!skb)
-               goto out;
-
-       /*
-        * Only clear those fields we need to clear, not those that we will
-        * actually initialise below. Hence, don't put any more fields after
-        * the tail pointer in struct sk_buff!
-        */
-       memset(skb, 0, offsetof(struct sk_buff, tail));
-       skb->head = NULL;
-       skb->truesize = sizeof(struct sk_buff);
-       refcount_set(&skb->users, 1);
-
-       skb->mac_header = (typeof(skb->mac_header))~0U;
-out:
-       return skb;
-}
-
 /**
  *     __alloc_skb     -       allocate a network buffer
  *     @size: size to allocate
@@ -663,8 +638,7 @@ void skb_release_head_state(struct sk_buff *skb)
 static void skb_release_all(struct sk_buff *skb)
 {
        skb_release_head_state(skb);
-       if (likely(skb->head))
-               skb_release_data(skb);
+       skb_release_data(skb);
 }
 
 /**
@@ -762,8 +736,7 @@ void consume_stateless_skb(struct sk_buff *skb)
                return;
 
        trace_consume_skb(skb);
-       if (likely(skb->head))
-               skb_release_data(skb);
+       skb_release_data(skb);
        kfree_skbmem(skb);
 }
 
@@ -1719,6 +1692,8 @@ pull_pages:
                        if (eat) {
                                skb_shinfo(skb)->frags[k].page_offset += eat;
                                skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+                               if (!i)
+                                       goto end;
                                eat = 0;
                        }
                        k++;
@@ -1726,6 +1701,7 @@ pull_pages:
        }
        skb_shinfo(skb)->nr_frags = k;
 
+end:
        skb->tail     += delta;
        skb->data_len -= delta;
 
index 416ac4e..a55e2e4 100644 (file)
@@ -220,6 +220,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 #ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+       return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+}
+
 int dsa_switch_suspend(struct dsa_switch *ds)
 {
        int i, ret = 0;
index 76c2077..5ce44fb 100644 (file)
@@ -1219,10 +1219,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
                                 netdev_features_t features)
 {
-       bool udpfrag = false, fixedid = false, gso_partial, encap;
+       bool fixedid = false, gso_partial, encap;
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        const struct net_offload *ops;
-       unsigned int offset = 0;
        struct iphdr *iph;
        int proto, tot_len;
        int nhoff;
@@ -1257,7 +1256,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
        segs = ERR_PTR(-EPROTONOSUPPORT);
 
        if (!skb->encapsulation || encap) {
-               udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
                fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
 
                /* fixed ID is invalid if DF bit is not set */
@@ -1277,13 +1275,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
        skb = segs;
        do {
                iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
-               if (udpfrag) {
-                       iph->frag_off = htons(offset >> 3);
-                       if (skb->next)
-                               iph->frag_off |= htons(IP_MF);
-                       offset += skb->len - nhoff - ihl;
-                       tot_len = skb->len - nhoff;
-               } else if (skb_is_gso(skb)) {
+               if (skb_is_gso(skb)) {
                        if (!fixedid) {
                                iph->id = htons(id);
                                id += skb_shinfo(skb)->gso_segs;
index d5cac99..416bb30 100644 (file)
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
        __be16 protocol = skb->protocol;
        u16 mac_len = skb->mac_len;
        int gre_offset, outer_hlen;
-       bool need_csum, ufo, gso_partial;
+       bool need_csum, gso_partial;
 
        if (!skb->encapsulation)
                goto out;
@@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
        need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
        skb->encap_hdr_csum = need_csum;
 
-       ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
        features &= skb->dev->hw_enc_features;
 
-       /* The only checksum offload we care about from here on out is the
-        * outer one so strip the existing checksum feature flags based
-        * on the fact that we will be computing our checksum in software.
-        */
-       if (ufo) {
-               features &= ~NETIF_F_CSUM_MASK;
-               if (!need_csum)
-                       features |= NETIF_F_HW_CSUM;
-       }
-
        /* segment inner packet. */
        segs = skb_mac_gso_segment(skb, features);
        if (IS_ERR_OR_NULL(segs)) {
index c5a117c..337ad41 100644 (file)
@@ -33,7 +33,7 @@
  *  also be removed if the pool is overloaded i.e. if the total amount of
  *  entries is greater-or-equal than the threshold.
  *
- *  Node pool is organised as an AVL tree.
+ *  Node pool is organised as an RB tree.
  *  Such an implementation has been chosen not just for fun.  It's a way to
  *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
  *  amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
  *      AND reference count being 0.
  *  3.  Global variable peer_total is modified under the pool lock.
  *  4.  struct inet_peer fields modification:
- *             avl_left, avl_right, avl_parent, avl_height: pool lock
+ *             rb_node: pool lock
  *             refcnt: atomically against modifications on other CPU;
  *                usually under some other lock to prevent node disappearing
  *             daddr: unchangeable
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
-       .avl_left       = peer_avl_empty_rcu,
-       .avl_right      = peer_avl_empty_rcu,
-       .avl_height     = 0
-};
-
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
-       bp->root = peer_avl_empty_rcu;
+       bp->rb_root = RB_ROOT;
        seqlock_init(&bp->lock);
        bp->total = 0;
 }
 EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;   /* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;     /* usual time to live: 10 min */
 
-static void inetpeer_gc_worker(struct work_struct *work)
-{
-       struct inet_peer *p, *n, *c;
-       struct list_head list;
-
-       spin_lock_bh(&gc_lock);
-       list_replace_init(&gc_list, &list);
-       spin_unlock_bh(&gc_lock);
-
-       if (list_empty(&list))
-               return;
-
-       list_for_each_entry_safe(p, n, &list, gc_list) {
-
-               if (need_resched())
-                       cond_resched();
-
-               c = rcu_dereference_protected(p->avl_left, 1);
-               if (c != peer_avl_empty) {
-                       list_add_tail(&c->gc_list, &list);
-                       p->avl_left = peer_avl_empty_rcu;
-               }
-
-               c = rcu_dereference_protected(p->avl_right, 1);
-               if (c != peer_avl_empty) {
-                       list_add_tail(&c->gc_list, &list);
-                       p->avl_right = peer_avl_empty_rcu;
-               }
-
-               n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
-               if (refcount_read(&p->refcnt) == 1) {
-                       list_del(&p->gc_list);
-                       kmem_cache_free(peer_cachep, p);
-               }
-       }
-
-       if (list_empty(&list))
-               return;
-
-       spin_lock_bh(&gc_lock);
-       list_splice(&list, &gc_list);
-       spin_unlock_bh(&gc_lock);
-
-       schedule_delayed_work(&gc_work, gc_delay);
-}
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -153,225 +91,62 @@ void __init inet_initpeers(void)
                        sizeof(struct inet_peer),
                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
                        NULL);
-
-       INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
-#define rcu_deref_locked(X, BASE)                              \
-       rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base)                          \
-({                                                             \
-       struct inet_peer *u;                                    \
-       struct inet_peer __rcu **v;                             \
-                                                               \
-       stackptr = _stack;                                      \
-       *stackptr++ = &_base->root;                             \
-       for (u = rcu_deref_locked(_base->root, _base);          \
-            u != peer_avl_empty;) {                            \
-               int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \
-               if (cmp == 0)                                   \
-                       break;                                  \
-               if (cmp == -1)                                  \
-                       v = &u->avl_left;                       \
-               else                                            \
-                       v = &u->avl_right;                      \
-               *stackptr++ = v;                                \
-               u = rcu_deref_locked(*v, _base);                \
-       }                                                       \
-       u;                                                      \
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-                                   struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+                               struct inet_peer_base *base,
+                               unsigned int seq,
+                               struct inet_peer *gc_stack[],
+                               unsigned int *gc_cnt,
+                               struct rb_node **parent_p,
+                               struct rb_node ***pp_p)
 {
-       struct inet_peer *u = rcu_dereference(base->root);
-       int count = 0;
+       struct rb_node **pp, *parent;
+       struct inet_peer *p;
+
+       pp = &base->rb_root.rb_node;
+       parent = NULL;
+       while (*pp) {
+               int cmp;
 
-       while (u != peer_avl_empty) {
-               int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+               parent = rcu_dereference_raw(*pp);
+               p = rb_entry(parent, struct inet_peer, rb_node);
+               cmp = inetpeer_addr_cmp(daddr, &p->daddr);
                if (cmp == 0) {
-                       /* Before taking a reference, check if this entry was
-                        * deleted (refcnt=0)
-                        */
-                       if (!refcount_inc_not_zero(&u->refcnt)) {
-                               u = NULL;
-                       }
-                       return u;
+                       if (!refcount_inc_not_zero(&p->refcnt))
+                               break;
+                       return p;
+               }
+               if (gc_stack) {
+                       if (*gc_cnt < PEER_MAX_GC)
+                               gc_stack[(*gc_cnt)++] = p;
+               } else if (unlikely(read_seqretry(&base->lock, seq))) {
+                       break;
                }
                if (cmp == -1)
-                       u = rcu_dereference(u->avl_left);
+                       pp = &(*pp)->rb_left;
                else
-                       u = rcu_dereference(u->avl_right);
-               if (unlikely(++count == PEER_MAXDEPTH))
-                       break;
+                       pp = &(*pp)->rb_right;
        }
+       *parent_p = parent;
+       *pp_p = pp;
        return NULL;
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base)                         \
-({                                                             \
-       struct inet_peer *u;                                    \
-       struct inet_peer __rcu **v;                             \
-       *stackptr++ = &start->avl_left;                         \
-       v = &start->avl_left;                                   \
-       for (u = rcu_deref_locked(*v, base);                    \
-            u->avl_right != peer_avl_empty_rcu;) {             \
-               v = &u->avl_right;                              \
-               *stackptr++ = v;                                \
-               u = rcu_deref_locked(*v, base);                 \
-       }                                                       \
-       u;                                                      \
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-                              struct inet_peer __rcu ***stackend,
-                              struct inet_peer_base *base)
-{
-       struct inet_peer __rcu **nodep;
-       struct inet_peer *node, *l, *r;
-       int lh, rh;
-
-       while (stackend > stack) {
-               nodep = *--stackend;
-               node = rcu_deref_locked(*nodep, base);
-               l = rcu_deref_locked(node->avl_left, base);
-               r = rcu_deref_locked(node->avl_right, base);
-               lh = node_height(l);
-               rh = node_height(r);
-               if (lh > rh + 1) { /* l: RH+2 */
-                       struct inet_peer *ll, *lr, *lrl, *lrr;
-                       int lrh;
-                       ll = rcu_deref_locked(l->avl_left, base);
-                       lr = rcu_deref_locked(l->avl_right, base);
-                       lrh = node_height(lr);
-                       if (lrh <= node_height(ll)) {   /* ll: RH+1 */
-                               RCU_INIT_POINTER(node->avl_left, lr);   /* lr: RH or RH+1 */
-                               RCU_INIT_POINTER(node->avl_right, r);   /* r: RH */
-                               node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-                               RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
-                               RCU_INIT_POINTER(l->avl_right, node);   /* node: RH+1 or RH+2 */
-                               l->avl_height = node->avl_height + 1;
-                               RCU_INIT_POINTER(*nodep, l);
-                       } else { /* ll: RH, lr: RH+1 */
-                               lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
-                               lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
-                               RCU_INIT_POINTER(node->avl_left, lrr);  /* lrr: RH or RH-1 */
-                               RCU_INIT_POINTER(node->avl_right, r);   /* r: RH */
-                               node->avl_height = rh + 1; /* node: RH+1 */
-                               RCU_INIT_POINTER(l->avl_left, ll);      /* ll: RH */
-                               RCU_INIT_POINTER(l->avl_right, lrl);    /* lrl: RH or RH-1 */
-                               l->avl_height = rh + 1; /* l: RH+1 */
-                               RCU_INIT_POINTER(lr->avl_left, l);      /* l: RH+1 */
-                               RCU_INIT_POINTER(lr->avl_right, node);  /* node: RH+1 */
-                               lr->avl_height = rh + 2;
-                               RCU_INIT_POINTER(*nodep, lr);
-                       }
-               } else if (rh > lh + 1) { /* r: LH+2 */
-                       struct inet_peer *rr, *rl, *rlr, *rll;
-                       int rlh;
-                       rr = rcu_deref_locked(r->avl_right, base);
-                       rl = rcu_deref_locked(r->avl_left, base);
-                       rlh = node_height(rl);
-                       if (rlh <= node_height(rr)) {   /* rr: LH+1 */
-                               RCU_INIT_POINTER(node->avl_right, rl);  /* rl: LH or LH+1 */
-                               RCU_INIT_POINTER(node->avl_left, l);    /* l: LH */
-                               node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-                               RCU_INIT_POINTER(r->avl_right, rr);     /* rr: LH+1 */
-                               RCU_INIT_POINTER(r->avl_left, node);    /* node: LH+1 or LH+2 */
-                               r->avl_height = node->avl_height + 1;
-                               RCU_INIT_POINTER(*nodep, r);
-                       } else { /* rr: RH, rl: RH+1 */
-                               rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
-                               rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
-                               RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
-                               RCU_INIT_POINTER(node->avl_left, l);    /* l: LH */
-                               node->avl_height = lh + 1; /* node: LH+1 */
-                               RCU_INIT_POINTER(r->avl_right, rr);     /* rr: LH */
-                               RCU_INIT_POINTER(r->avl_left, rlr);     /* rlr: LH or LH-1 */
-                               r->avl_height = lh + 1; /* r: LH+1 */
-                               RCU_INIT_POINTER(rl->avl_right, r);     /* r: LH+1 */
-                               RCU_INIT_POINTER(rl->avl_left, node);   /* node: LH+1 */
-                               rl->avl_height = lh + 2;
-                               RCU_INIT_POINTER(*nodep, rl);
-                       }
-               } else {
-                       node->avl_height = (lh > rh ? lh : rh) + 1;
-               }
-       }
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base)                                  \
-do {                                                           \
-       n->avl_height = 1;                                      \
-       n->avl_left = peer_avl_empty_rcu;                       \
-       n->avl_right = peer_avl_empty_rcu;                      \
-       /* lockless readers can catch us now */                 \
-       rcu_assign_pointer(**--stackptr, n);                    \
-       peer_avl_rebalance(stack, stackptr, base);              \
-} while (0)
-
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
        kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
 }
 
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
-                            struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
-       struct inet_peer __rcu ***stackptr, ***delp;
-
-       if (lookup(&p->daddr, stack, base) != p)
-               BUG();
-       delp = stackptr - 1; /* *delp[0] == p */
-       if (p->avl_left == peer_avl_empty_rcu) {
-               *delp[0] = p->avl_right;
-               --stackptr;
-       } else {
-               /* look for a node to insert instead of p */
-               struct inet_peer *t;
-               t = lookup_rightempty(p, base);
-               BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
-               **--stackptr = t->avl_left;
-               /* t is removed, t->daddr > x->daddr for any
-                * x in p->avl_left subtree.
-                * Put t in the old place of p. */
-               RCU_INIT_POINTER(*delp[0], t);
-               t->avl_left = p->avl_left;
-               t->avl_right = p->avl_right;
-               t->avl_height = p->avl_height;
-               BUG_ON(delp[1] != &p->avl_left);
-               delp[1] = &t->avl_left; /* was &p->avl_left */
-       }
-       peer_avl_rebalance(stack, stackptr, base);
-       base->total--;
-       call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
 /* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
-                       struct inet_peer __rcu **stack[PEER_MAXDEPTH],
-                       struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+                        struct inet_peer *gc_stack[],
+                        unsigned int gc_cnt)
 {
-       struct inet_peer *p, *gchead = NULL;
+       struct inet_peer *p;
        __u32 delta, ttl;
-       int cnt = 0;
+       int i;
 
        if (base->total >= inet_peer_threshold)
                ttl = 0; /* be aggressive */
@@ -379,43 +154,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
                ttl = inet_peer_maxttl
                                - (inet_peer_maxttl - inet_peer_minttl) / HZ *
                                        base->total / inet_peer_threshold * HZ;
-       stackptr--; /* last stack slot is peer_avl_empty */
-       while (stackptr > stack) {
-               stackptr--;
-               p = rcu_deref_locked(**stackptr, base);
-               if (refcount_read(&p->refcnt) == 1) {
-                       smp_rmb();
-                       delta = (__u32)jiffies - p->dtime;
-                       if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
-                               p->gc_next = gchead;
-                               gchead = p;
-                       }
-               }
+       for (i = 0; i < gc_cnt; i++) {
+               p = gc_stack[i];
+               delta = (__u32)jiffies - p->dtime;
+               if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+                       gc_stack[i] = NULL;
        }
-       while ((p = gchead) != NULL) {
-               gchead = p->gc_next;
-               cnt++;
-               unlink_from_pool(p, base, stack);
+       for (i = 0; i < gc_cnt; i++) {
+               p = gc_stack[i];
+               if (p) {
+                       rb_erase(&p->rb_node, &base->rb_root);
+                       base->total--;
+                       call_rcu(&p->rcu, inetpeer_free_rcu);
+               }
        }
-       return cnt;
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
                               const struct inetpeer_addr *daddr,
                               int create)
 {
-       struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-       struct inet_peer *p;
-       unsigned int sequence;
-       int invalidated, gccnt = 0;
+       struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+       struct rb_node **pp, *parent;
+       unsigned int gc_cnt, seq;
+       int invalidated;
 
        /* Attempt a lockless lookup first.
         * Because of a concurrent writer, we might not find an existing entry.
         */
        rcu_read_lock();
-       sequence = read_seqbegin(&base->lock);
-       p = lookup_rcu(daddr, base);
-       invalidated = read_seqretry(&base->lock, sequence);
+       seq = read_seqbegin(&base->lock);
+       p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+       invalidated = read_seqretry(&base->lock, seq);
        rcu_read_unlock();
 
        if (p)
@@ -428,36 +198,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
        /* retry an exact lookup, taking the lock before.
         * At least, nodes should be hot in our cache.
         */
+       parent = NULL;
        write_seqlock_bh(&base->lock);
-relookup:
-       p = lookup(daddr, stack, base);
-       if (p != peer_avl_empty) {
-               refcount_inc(&p->refcnt);
-               write_sequnlock_bh(&base->lock);
-               return p;
-       }
-       if (!gccnt) {
-               gccnt = inet_peer_gc(base, stack, stackptr);
-               if (gccnt && create)
-                       goto relookup;
-       }
-       p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
-       if (p) {
-               p->daddr = *daddr;
-               refcount_set(&p->refcnt, 2);
-               atomic_set(&p->rid, 0);
-               p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
-               p->rate_tokens = 0;
-               /* 60*HZ is arbitrary, but chosen enough high so that the first
-                * calculation of tokens is at its maximum.
-                */
-               p->rate_last = jiffies - 60*HZ;
-               INIT_LIST_HEAD(&p->gc_list);
 
-               /* Link the node. */
-               link_to_pool(p, base);
-               base->total++;
+       gc_cnt = 0;
+       p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+       if (!p && create) {
+               p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+               if (p) {
+                       p->daddr = *daddr;
+                       refcount_set(&p->refcnt, 2);
+                       atomic_set(&p->rid, 0);
+                       p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+                       p->rate_tokens = 0;
+                       /* 60*HZ is arbitrary, but chosen enough high so that the first
+                        * calculation of tokens is at its maximum.
+                        */
+                       p->rate_last = jiffies - 60*HZ;
+
+                       rb_link_node(&p->rb_node, parent, pp);
+                       rb_insert_color(&p->rb_node, &base->rb_root);
+                       base->total++;
+               }
        }
+       if (gc_cnt)
+               inet_peer_gc(base, gc_stack, gc_cnt);
        write_sequnlock_bh(&base->lock);
 
        return p;
@@ -467,8 +232,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
        p->dtime = (__u32)jiffies;
-       smp_mb__before_atomic();
-       refcount_dec(&p->refcnt);
+
+       if (refcount_dec_and_test(&p->refcnt))
+               call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
@@ -513,30 +279,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
-       struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
-       spin_lock_bh(&gc_lock);
-       list_add_tail(&p->gc_list, &gc_list);
-       spin_unlock_bh(&gc_lock);
-
-       schedule_delayed_work(&gc_work, gc_delay);
-}
-
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
-       struct inet_peer *root;
-
-       write_seqlock_bh(&base->lock);
+       struct inet_peer *p, *n;
 
-       root = rcu_deref_locked(base->root, base);
-       if (root != peer_avl_empty) {
-               base->root = peer_avl_empty_rcu;
-               base->total = 0;
-               call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+       rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
+               inet_putpeer(p);
+               cond_resched();
        }
 
-       write_sequnlock_bh(&base->lock);
+       base->rb_root = RB_ROOT;
+       base->total = 0;
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
index 50c74cd..b631ec6 100644 (file)
@@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy)
        return csum;
 }
 
-static inline int ip_ufo_append_data(struct sock *sk,
-                       struct sk_buff_head *queue,
-                       int getfrag(void *from, char *to, int offset, int len,
-                              int odd, struct sk_buff *skb),
-                       void *from, int length, int hh_len, int fragheaderlen,
-                       int transhdrlen, int maxfraglen, unsigned int flags)
-{
-       struct sk_buff *skb;
-       int err;
-
-       /* There is support for UDP fragmentation offload by network
-        * device, so create one single skb packet containing complete
-        * udp datagram
-        */
-       skb = skb_peek_tail(queue);
-       if (!skb) {
-               skb = sock_alloc_send_skb(sk,
-                       hh_len + fragheaderlen + transhdrlen + 20,
-                       (flags & MSG_DONTWAIT), &err);
-
-               if (!skb)
-                       return err;
-
-               /* reserve space for Hardware header */
-               skb_reserve(skb, hh_len);
-
-               /* create space for UDP/IP header */
-               skb_put(skb, fragheaderlen + transhdrlen);
-
-               /* initialize network header pointer */
-               skb_reset_network_header(skb);
-
-               /* initialize protocol header pointer */
-               skb->transport_header = skb->network_header + fragheaderlen;
-
-               skb->csum = 0;
-
-               if (flags & MSG_CONFIRM)
-                       skb_set_dst_pending_confirm(skb, 1);
-
-               __skb_queue_tail(queue, skb);
-       } else if (skb_is_gso(skb)) {
-               goto append;
-       }
-
-       skb->ip_summed = CHECKSUM_PARTIAL;
-       /* specify the length of each IP datagram fragment */
-       skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
-       skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-
-append:
-       return skb_append_datato_frags(sk, skb, getfrag, from,
-                                      (length - transhdrlen));
-}
-
 static int __ip_append_data(struct sock *sk,
                            struct flowi4 *fl4,
                            struct sk_buff_head *queue,
@@ -965,18 +910,6 @@ static int __ip_append_data(struct sock *sk,
                csummode = CHECKSUM_PARTIAL;
 
        cork->length += length;
-       if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
-            (skb && skb_is_gso(skb))) &&
-           (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-           (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
-               err = ip_ufo_append_data(sk, queue, getfrag, from, length,
-                                        hh_len, fragheaderlen, transhdrlen,
-                                        maxfraglen, flags);
-               if (err)
-                       goto error;
-               return 0;
-       }
 
        /* So, what's going on in the loop below?
         *
@@ -1287,15 +1220,6 @@ ssize_t  ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
        if (!skb)
                return -EINVAL;
 
-       if ((size + skb->len > mtu) &&
-           (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO)) {
-               if (skb->ip_summed != CHECKSUM_PARTIAL)
-                       return -EOPNOTSUPP;
-
-               skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-               skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-       }
        cork->length += size;
 
        while (size > 0) {
index 0192c25..5ed63d2 100644 (file)
@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
        .get_link_net   = ip_tunnel_get_link_net,
 };
 
-static bool is_vti_tunnel(const struct net_device *dev)
-{
-       return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
-                           unsigned long event, void *ptr)
-{
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct ip_tunnel *tunnel = netdev_priv(dev);
-
-       if (!is_vti_tunnel(dev))
-               return NOTIFY_DONE;
-
-       switch (event) {
-       case NETDEV_DOWN:
-               if (!net_eq(tunnel->net, dev_net(dev)))
-                       xfrm_garbage_collect(tunnel->net);
-               break;
-       }
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
-       .notifier_call = vti_device_event,
-};
-
 static int __init vti_init(void)
 {
        const char *msg;
@@ -618,8 +591,6 @@ static int __init vti_init(void)
 
        pr_info("IPv4 over IPsec tunneling driver\n");
 
-       register_netdevice_notifier(&vti_notifier_block);
-
        msg = "tunnel device";
        err = register_pernet_device(&vti_net_ops);
        if (err < 0)
@@ -652,7 +623,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
        unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
-       unregister_netdevice_notifier(&vti_notifier_block);
        pr_err("vti init: failed to register %s\n", msg);
        return err;
 }
@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
        xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
        xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
        unregister_pernet_device(&vti_net_ops);
-       unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
index 4e985de..886d874 100644 (file)
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        u32 timeout, tlp_time_stamp, rto_time_stamp;
-       u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
        /* No consecutive loss probes. */
        if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
             tcp_send_head(sk))
                return false;
 
-       /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+       /* Probe timeout is 2*rtt. Add minimum RTO to account
         * for delayed ack when there's one outstanding packet. If no RTT
         * sample is available then probe after TCP_TIMEOUT_INIT.
         */
-       timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-       if (tp->packets_out == 1)
-               timeout = max_t(u32, timeout,
-                               (rtt + (rtt >> 1) + TCP_DELACK_MAX));
-       timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+       if (tp->srtt_us) {
+               timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+               if (tp->packets_out == 1)
+                       timeout += TCP_RTO_MIN;
+               else
+                       timeout += TCP_TIMEOUT_MIN;
+       } else {
+               timeout = TCP_TIMEOUT_INIT;
+       }
 
        /* If RTO is shorter, just schedule TLP in its place. */
        tlp_time_stamp = tcp_jiffies32 + timeout;
index fe9a493..449cd91 100644 (file)
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
        tp->rack.advanced = 0;
        tcp_rack_detect_loss(sk, &timeout);
        if (timeout) {
-               timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+               timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
                                          timeout, inet_csk(sk)->icsk_rto);
        }
index 7812501..97658bf 100644 (file)
@@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
        __be16 new_protocol, bool is_ipv6)
 {
        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-       bool remcsum, need_csum, offload_csum, ufo, gso_partial;
+       bool remcsum, need_csum, offload_csum, gso_partial;
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct udphdr *uh = udp_hdr(skb);
        u16 mac_offset = skb->mac_header;
@@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
        remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
        skb->remcsum_offload = remcsum;
 
-       ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
        need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
        /* Try to offload checksum if possible */
        offload_csum = !!(need_csum &&
@@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
         * outer one so strip the existing checksum feature flags and
         * instead set the flag based on our outer checksum offload value.
         */
-       if (remcsum || ufo) {
+       if (remcsum) {
                features &= ~NETIF_F_CSUM_MASK;
                if (!need_csum || offload_csum)
                        features |= NETIF_F_HW_CSUM;
@@ -189,66 +187,16 @@ out_unlock:
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
-static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
-                                        netdev_features_t features)
+static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+                                          netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
-       unsigned int mss;
-       __wsum csum;
-       struct udphdr *uh;
-       struct iphdr *iph;
 
        if (skb->encapsulation &&
            (skb_shinfo(skb)->gso_type &
-            (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+            (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
                segs = skb_udp_tunnel_segment(skb, features, false);
-               goto out;
-       }
 
-       if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-               goto out;
-
-       mss = skb_shinfo(skb)->gso_size;
-       if (unlikely(skb->len <= mss))
-               goto out;
-
-       if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-               /* Packet is from an untrusted source, reset gso_segs. */
-
-               skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-               segs = NULL;
-               goto out;
-       }
-
-       /* Do software UFO. Complete and fill in the UDP checksum as
-        * HW cannot do checksum of UDP packets sent as multiple
-        * IP fragments.
-        */
-
-       uh = udp_hdr(skb);
-       iph = ip_hdr(skb);
-
-       uh->check = 0;
-       csum = skb_checksum(skb, 0, skb->len, 0);
-       uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
-       if (uh->check == 0)
-               uh->check = CSUM_MANGLED_0;
-
-       skb->ip_summed = CHECKSUM_NONE;
-
-       /* If there is no outer header we can fake a checksum offload
-        * due to the fact that we have already done the checksum in
-        * software prior to segmenting the frame.
-        */
-       if (!skb->encap_hdr_csum)
-               features |= NETIF_F_HW_CSUM;
-
-       /* Fragment the skb. IP headers of the fragments are updated in
-        * inet_gso_segment()
-        */
-       segs = skb_segment(skb, features);
-out:
        return segs;
 }
 
@@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
        .callbacks = {
-               .gso_segment = udp4_ufo_fragment,
+               .gso_segment = udp4_tunnel_segment,
                .gro_receive  = udp4_gro_receive,
                .gro_complete = udp4_gro_complete,
        },
index 71b4ecc..4aefb14 100644 (file)
@@ -213,14 +213,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
        fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-       struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-       xfrm_garbage_collect_deferred(net);
-       return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
                              struct sk_buff *skb, u32 mtu)
 {
@@ -259,14 +251,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
        .family =               AF_INET,
-       .gc =                   xfrm4_garbage_collect,
        .update_pmtu =          xfrm4_update_pmtu,
        .redirect =             xfrm4_redirect,
        .cow_metrics =          dst_cow_metrics_generic,
        .destroy =              xfrm4_dst_destroy,
        .ifdown =               xfrm4_dst_ifdown,
        .local_out =            __ip_local_out,
-       .gc_thresh =            INT_MAX,
+       .gc_thresh =            32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
index 1422d6c..c6ec064 100644 (file)
@@ -1114,69 +1114,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
-static inline int ip6_ufo_append_data(struct sock *sk,
-                       struct sk_buff_head *queue,
-                       int getfrag(void *from, char *to, int offset, int len,
-                       int odd, struct sk_buff *skb),
-                       void *from, int length, int hh_len, int fragheaderlen,
-                       int exthdrlen, int transhdrlen, int mtu,
-                       unsigned int flags, const struct flowi6 *fl6)
-
-{
-       struct sk_buff *skb;
-       int err;
-
-       /* There is support for UDP large send offload by network
-        * device, so create one single skb packet containing complete
-        * udp datagram
-        */
-       skb = skb_peek_tail(queue);
-       if (!skb) {
-               skb = sock_alloc_send_skb(sk,
-                       hh_len + fragheaderlen + transhdrlen + 20,
-                       (flags & MSG_DONTWAIT), &err);
-               if (!skb)
-                       return err;
-
-               /* reserve space for Hardware header */
-               skb_reserve(skb, hh_len);
-
-               /* create space for UDP/IP header */
-               skb_put(skb, fragheaderlen + transhdrlen);
-
-               /* initialize network header pointer */
-               skb_set_network_header(skb, exthdrlen);
-
-               /* initialize protocol header pointer */
-               skb->transport_header = skb->network_header + fragheaderlen;
-
-               skb->protocol = htons(ETH_P_IPV6);
-               skb->csum = 0;
-
-               if (flags & MSG_CONFIRM)
-                       skb_set_dst_pending_confirm(skb, 1);
-
-               __skb_queue_tail(queue, skb);
-       } else if (skb_is_gso(skb)) {
-               goto append;
-       }
-
-       skb->ip_summed = CHECKSUM_PARTIAL;
-       /* Specify the length of each IPv6 datagram fragment.
-        * It has to be a multiple of 8.
-        */
-       skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
-                                    sizeof(struct frag_hdr)) & ~7;
-       skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-       skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
-                                                        &fl6->daddr,
-                                                        &fl6->saddr);
-
-append:
-       return skb_append_datato_frags(sk, skb, getfrag, from,
-                                      (length - transhdrlen));
-}
-
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
                                               gfp_t gfp)
 {
@@ -1385,19 +1322,6 @@ emsgsize:
         */
 
        cork->length += length;
-       if ((((length + (skb ? skb->len : headersize)) > mtu) ||
-            (skb && skb_is_gso(skb))) &&
-           (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-           (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
-               err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
-                                         hh_len, fragheaderlen, exthdrlen,
-                                         transhdrlen, mtu, flags, fl6);
-               if (err)
-                       goto error;
-               return 0;
-       }
-
        if (!skb)
                goto alloc_new_skb;
 
index 486c230..79444a4 100644 (file)
@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
        .priority       =       100,
 };
 
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
-       return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
-                            unsigned long event, void *ptr)
-{
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct ip6_tnl *t = netdev_priv(dev);
-
-       if (!is_vti6_tunnel(dev))
-               return NOTIFY_DONE;
-
-       switch (event) {
-       case NETDEV_DOWN:
-               if (!net_eq(t->net, dev_net(dev)))
-                       xfrm_garbage_collect(t->net);
-               break;
-       }
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
-       .notifier_call = vti6_device_event,
-};
-
 /**
  * vti6_tunnel_init - register protocol and reserve needed resources
  *
@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
        const char *msg;
        int err;
 
-       register_netdevice_notifier(&vti6_notifier_block);
-
        msg = "tunnel device";
        err = register_pernet_device(&vti6_net_ops);
        if (err < 0)
@@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
        unregister_pernet_device(&vti6_net_ops);
 pernet_dev_failed:
-       unregister_netdevice_notifier(&vti6_notifier_block);
        pr_err("vti6 init: failed to register %s\n", msg);
        return err;
 }
@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
        xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
        xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
        unregister_pernet_device(&vti6_net_ops);
-       unregister_netdevice_notifier(&vti6_notifier_block);
 }
 
 module_init(vti6_tunnel_init);
index a2267f8..455fd4e 100644 (file)
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
-                                        netdev_features_t features)
+static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+                                          netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
-       unsigned int mss;
-       unsigned int unfrag_ip6hlen, unfrag_len;
-       struct frag_hdr *fptr;
-       u8 *packet_start, *prevhdr;
-       u8 nexthdr;
-       u8 frag_hdr_sz = sizeof(struct frag_hdr);
-       __wsum csum;
-       int tnl_hlen;
-       int err;
-
-       mss = skb_shinfo(skb)->gso_size;
-       if (unlikely(skb->len <= mss))
-               goto out;
-
-       if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-               /* Packet is from an untrusted source, reset gso_segs. */
-
-               skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-               /* Set the IPv6 fragment id if not set yet */
-               if (!skb_shinfo(skb)->ip6_frag_id)
-                       ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-
-               segs = NULL;
-               goto out;
-       }
 
        if (skb->encapsulation && skb_shinfo(skb)->gso_type &
            (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
                segs = skb_udp_tunnel_segment(skb, features, true);
-       else {
-               const struct ipv6hdr *ipv6h;
-               struct udphdr *uh;
-
-               if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-                       goto out;
-
-               /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-                * do checksum of UDP packets sent as multiple IP fragments.
-                */
-
-               uh = udp_hdr(skb);
-               ipv6h = ipv6_hdr(skb);
-
-               uh->check = 0;
-               csum = skb_checksum(skb, 0, skb->len, 0);
-               uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
-                                         &ipv6h->daddr, csum);
-               if (uh->check == 0)
-                       uh->check = CSUM_MANGLED_0;
-
-               skb->ip_summed = CHECKSUM_NONE;
-
-               /* If there is no outer header we can fake a checksum offload
-                * due to the fact that we have already done the checksum in
-                * software prior to segmenting the frame.
-                */
-               if (!skb->encap_hdr_csum)
-                       features |= NETIF_F_HW_CSUM;
-
-               /* Check if there is enough headroom to insert fragment header. */
-               tnl_hlen = skb_tnl_header_len(skb);
-               if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
-                       if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
-                               goto out;
-               }
-
-               /* Find the unfragmentable header and shift it left by frag_hdr_sz
-                * bytes to insert fragment header.
-                */
-               err = ip6_find_1stfragopt(skb, &prevhdr);
-               if (err < 0)
-                       return ERR_PTR(err);
-               unfrag_ip6hlen = err;
-               nexthdr = *prevhdr;
-               *prevhdr = NEXTHDR_FRAGMENT;
-               unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-                            unfrag_ip6hlen + tnl_hlen;
-               packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
-               memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
-               SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
-               skb->mac_header -= frag_hdr_sz;
-               skb->network_header -= frag_hdr_sz;
-
-               fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-               fptr->nexthdr = nexthdr;
-               fptr->reserved = 0;
-               if (!skb_shinfo(skb)->ip6_frag_id)
-                       ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-               fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-
-               /* Fragment the skb. ipv6 header and the remaining fields of the
-                * fragment header are updated in ipv6_gso_segment()
-                */
-               segs = skb_segment(skb, features);
-       }
 
-out:
        return segs;
 }
 
@@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
        .callbacks = {
-               .gso_segment    =       udp6_ufo_fragment,
+               .gso_segment    =       udp6_tunnel_segment,
                .gro_receive    =       udp6_gro_receive,
                .gro_complete   =       udp6_gro_complete,
        },
index 79651bc..f44b25a 100644 (file)
@@ -214,14 +214,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
        }
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-       struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-       xfrm_garbage_collect_deferred(net);
-       return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
                              struct sk_buff *skb, u32 mtu)
 {
@@ -279,14 +271,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
        .family =               AF_INET6,
-       .gc =                   xfrm6_garbage_collect,
        .update_pmtu =          xfrm6_update_pmtu,
        .redirect =             xfrm6_redirect,
        .cow_metrics =          dst_cow_metrics_generic,
        .destroy =              xfrm6_dst_destroy,
        .ifdown =               xfrm6_dst_ifdown,
        .local_out =            __ip6_local_out,
-       .gc_thresh =            INT_MAX,
+       .gc_thresh =            32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
index ca9d3ae..10d7133 100644 (file)
@@ -2398,8 +2398,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
        xfrm_pol_put(xp);
-       if (err == 0)
-               xfrm_garbage_collect(net);
        return err;
 }
 
@@ -2650,8 +2648,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
        xfrm_pol_put(xp);
-       if (delete && err == 0)
-               xfrm_garbage_collect(net);
        return err;
 }
 
@@ -2751,8 +2747,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
        int err, err2;
 
        err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
-       if (!err)
-               xfrm_garbage_collect(net);
        err2 = unicast_flush_resp(sk, hdr);
        if (err || err2) {
                if (err == -ESRCH) /* empty table - old silent behavior */
index 45fe8c8..f6e229b 100644 (file)
@@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
                             const struct dp_upcall_info *upcall_info,
                                 uint32_t cutlen)
 {
-       unsigned short gso_type = skb_shinfo(skb)->gso_type;
-       struct sw_flow_key later_key;
        struct sk_buff *segs, *nskb;
        int err;
 
@@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
        if (segs == NULL)
                return -EINVAL;
 
-       if (gso_type & SKB_GSO_UDP) {
-               /* The initial flow key extracted by ovs_flow_key_extract()
-                * in this case is for a first fragment, so we need to
-                * properly mark later fragments.
-                */
-               later_key = *key;
-               later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-       }
-
        /* Queue all of the segments. */
        skb = segs;
        do {
-               if (gso_type & SKB_GSO_UDP && skb != segs)
-                       key = &later_key;
-
                err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
                if (err)
                        break;
index 3f76cb7..8c94cef 100644 (file)
@@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
                           const struct sk_buff *skb)
 {
        struct flow_stats *stats;
-       int node = numa_node_id();
-       int cpu = smp_processor_id();
+       unsigned int cpu = smp_processor_id();
        int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
        stats = rcu_dereference(flow->stats[cpu]);
@@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
                                                              __GFP_THISNODE |
                                                              __GFP_NOWARN |
                                                              __GFP_NOMEMALLOC,
-                                                             node);
+                                                             numa_node_id());
                                if (likely(new_stats)) {
                                        new_stats->used = jiffies;
                                        new_stats->packet_count = 1;
@@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
                                        rcu_assign_pointer(flow->stats[cpu],
                                                           new_stats);
+                                       cpumask_set_cpu(cpu, &flow->cpu_used_mask);
                                        goto unlock;
                                }
                        }
@@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
        memset(ovs_stats, 0, sizeof(*ovs_stats));
 
        /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
                struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
                if (stats) {
@@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
        int cpu;
 
        /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
                struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
                if (stats) {
@@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                        key->ip.frag = OVS_FRAG_TYPE_LATER;
                        return 0;
                }
-               if (nh->frag_off & htons(IP_MF) ||
-                       skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+               if (nh->frag_off & htons(IP_MF))
                        key->ip.frag = OVS_FRAG_TYPE_FIRST;
                else
                        key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
                if (key->ip.frag == OVS_FRAG_TYPE_LATER)
                        return 0;
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-                       key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
                /* Transport layer. */
                if (key->ip.proto == NEXTHDR_TCP) {
                        if (tcphdr_ok(skb)) {
index a9bc1c8..1875bba 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
@@ -219,6 +220,7 @@ struct sw_flow {
                                         */
        struct sw_flow_key key;
        struct sw_flow_id id;
+       struct cpumask cpu_used_mask;
        struct sw_flow_mask *mask;
        struct sw_flow_actions __rcu *sf_acts;
        struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
index ea7a807..80ea2a7 100644 (file)
@@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
        RCU_INIT_POINTER(flow->stats[0], stats);
 
+       cpumask_set_cpu(0, &flow->cpu_used_mask);
+
        return flow;
 err:
        kmem_cache_free(flow_cache, flow);
@@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow)
        if (flow->sf_acts)
                ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
        /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
                if (flow->stats[cpu])
                        kmem_cache_free(flow_stats_cache,
                                        (struct flow_stats __force *)flow->stats[cpu]);
index 008bb34..e7303f6 100644 (file)
@@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 #define BLK_PLUS_PRIV(sz_of_priv) \
        (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
 
-#define PGV_FROM_VMALLOC 1
-
 #define BLOCK_STATUS(x)        ((x)->hdr.bh1.block_status)
 #define BLOCK_NUM_PKTS(x)      ((x)->hdr.bh1.num_pkts)
 #define BLOCK_O2FP(x)          ((x)->hdr.bh1.offset_to_first_pkt)
index 50a3789..005bca6 100644 (file)
@@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
        if (!cp->cp_transport_data)
                return;
 
-       rds_conn_path_drop(cp);
-       flush_work(&cp->cp_down_w);
-
        /* make sure lingering queued work won't try to ref the conn */
        cancel_delayed_work_sync(&cp->cp_send_w);
        cancel_delayed_work_sync(&cp->cp_recv_w);
 
+       rds_conn_path_drop(cp, true);
+       flush_work(&cp->cp_down_w);
+
        /* tear down queued messages */
        list_for_each_entry_safe(rm, rtmp,
                                 &cp->cp_send_queue,
@@ -664,9 +664,13 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
 {
        atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+       if (!destroy && cp->cp_conn->c_destroy_in_prog)
+               return;
+
        queue_work(rds_wq, &cp->cp_down_w);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 void rds_conn_drop(struct rds_connection *conn)
 {
        WARN_ON(conn->c_trans->t_mp_capable);
-       rds_conn_path_drop(&conn->c_path[0]);
+       rds_conn_path_drop(&conn->c_path[0], false);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
        vprintk(fmt, ap);
        va_end(ap);
 
-       rds_conn_path_drop(cp);
+       rds_conn_path_drop(cp, false);
 }
index 516bcc8..3382695 100644 (file)
@@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
 void rds_conn_connect_if_down(struct rds_connection *conn);
 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
index 431404d..6b7ee71 100644 (file)
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
                        continue;
 
                /* reconnect with new parameters */
-               rds_conn_path_drop(tc->t_cpath);
+               rds_conn_path_drop(tc->t_cpath, false);
        }
        spin_unlock_irq(&rds_tcp_conn_lock);
 }
index cbe08a1..46f74da 100644 (file)
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
                if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
                    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
                                             RDS_CONN_ERROR)) {
-                       rds_conn_path_drop(cp);
+                       rds_conn_path_drop(cp, false);
                } else {
                        rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
                }
                break;
        case TCP_CLOSE_WAIT:
        case TCP_CLOSE:
-               rds_conn_path_drop(cp);
+               rds_conn_path_drop(cp, false);
        default:
                break;
        }
index 0d8616a..dc860d1 100644 (file)
@@ -157,7 +157,7 @@ out:
                                        "returned %d, "
                                        "disconnecting and reconnecting\n",
                                        &conn->c_faddr, cp->cp_index, ret);
-                               rds_conn_path_drop(cp);
+                               rds_conn_path_drop(cp, false);
                        }
                }
        }
index 2852bc1..f121daa 100644 (file)
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                                "current state is %d\n",
                                __func__,
                                atomic_read(&cp->cp_state));
-               rds_conn_path_drop(cp);
+               rds_conn_path_drop(cp, false);
                return;
        }
 
index 3317a2f..67afc12 100644 (file)
@@ -231,9 +231,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
        const struct iphdr *iph;
        u16 ul;
 
-       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-               return 1;
-
        /*
         * Support both UDP and UDPLITE checksum algorithms, Don't use
         * udph->len to get the real length without any protocol check,
@@ -287,9 +284,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
        const struct ipv6hdr *ip6h;
        u16 ul;
 
-       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-               return 1;
-
        /*
         * Support both UDP and UDPLITE checksum algorithms, Don't use
         * udph->len to get the real length without any protocol check,
index e001b01..00667c5 100644 (file)
@@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  *    are called the two key vectors.
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
-                       sctp_random_param_t *random,
-                       sctp_chunks_param_t *chunks,
-                       sctp_hmac_algo_param_t *hmacs,
+                       struct sctp_random_param *random,
+                       struct sctp_chunks_param *chunks,
+                       struct sctp_hmac_algo_param *hmacs,
                        gfp_t gfp)
 {
        struct sctp_auth_bytes *new;
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
                                    gfp_t gfp)
 {
        return sctp_auth_make_key_vector(
-                                   (sctp_random_param_t *)asoc->c.auth_random,
-                                   (sctp_chunks_param_t *)asoc->c.auth_chunks,
-                                   (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
-                                   gfp);
+                       (struct sctp_random_param *)asoc->c.auth_random,
+                       (struct sctp_chunks_param *)asoc->c.auth_chunks,
+                       (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
index 0e86f98..3d506b2 100644 (file)
@@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
                 * variables.  There are arrays that we encode directly
                 * into parameters to make the rest of the operations easier.
                 */
-               auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
-                               sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+               auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+                                    sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
                if (!auth_hmacs)
                        goto nomem;
 
-               auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
-                                       SCTP_NUM_CHUNK_TYPES, gfp);
+               auth_chunks = kzalloc(sizeof(*auth_chunks) +
+                                     SCTP_NUM_CHUNK_TYPES, gfp);
                if (!auth_chunks)
                        goto nomem;
 
index 2a186b2..107d7c9 100644 (file)
@@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 static int sctp_v6_to_addr_param(const union sctp_addr *addr,
                                 union sctp_addr_param *param)
 {
-       int length = sizeof(sctp_ipv6addr_param_t);
+       int length = sizeof(struct sctp_ipv6addr_param);
 
        param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
        param->v6.param_hdr.length = htons(length);
index 989a900..852556d 100644 (file)
@@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
 static int sctp_v4_to_addr_param(const union sctp_addr *addr,
                                 union sctp_addr_param *param)
 {
-       int length = sizeof(sctp_ipv4addr_param_t);
+       int length = sizeof(struct sctp_ipv4addr_param);
 
        param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
        param->v4.param_hdr.length = htons(length);
index 6110447..0b36e96 100644 (file)
@@ -223,10 +223,10 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
        struct sctp_chunk *retval = NULL;
        int num_types, addrs_len = 0;
        struct sctp_sock *sp;
-       sctp_supported_addrs_param_t sat;
+       struct sctp_supported_addrs_param sat;
        __be16 types[2];
-       sctp_adaptation_ind_param_t aiparam;
-       sctp_supported_ext_param_t ext_param;
+       struct sctp_adaptation_ind_param aiparam;
+       struct sctp_supported_ext_param ext_param;
        int num_ext = 0;
        __u8 extensions[4];
        struct sctp_paramhdr *auth_chunks = NULL,
@@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
        /* If we have any extensions to report, account for that */
        if (num_ext)
-               chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-                                      num_ext);
+               chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
        /* RFC 2960 3.3.2 Initiation (INIT) (1)
         *
@@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
         */
        if (num_ext) {
                ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-               ext_param.param_hdr.length =
-                           htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-               sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-                               &ext_param);
+               ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+               sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
                sctp_addto_param(retval, num_ext, extensions);
        }
 
@@ -393,8 +390,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
        sctp_cookie_param_t *cookie;
        int cookie_len;
        size_t chunksize;
-       sctp_adaptation_ind_param_t aiparam;
-       sctp_supported_ext_param_t ext_param;
+       struct sctp_adaptation_ind_param aiparam;
+       struct sctp_supported_ext_param ext_param;
        int num_ext = 0;
        __u8 extensions[4];
        struct sctp_paramhdr *auth_chunks = NULL,
@@ -468,8 +465,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
        }
 
        if (num_ext)
-               chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-                                      num_ext);
+               chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
        /* Now allocate and fill out the chunk.  */
        retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -495,10 +491,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
                sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
        if (num_ext) {
                ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-               ext_param.param_hdr.length =
-                           htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-               sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-                                &ext_param);
+               ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+               sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
                sctp_addto_param(retval, num_ext, extensions);
        }
        if (asoc->peer.prsctp_capable)
@@ -3153,7 +3147,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
                case SCTP_PARAM_ERR_CAUSE:
                        break;
                case SCTP_PARAM_IPV4_ADDRESS:
-                       if (length != sizeof(sctp_ipv4addr_param_t))
+                       if (length != sizeof(struct sctp_ipv4addr_param))
                                return false;
                        /* ensure there is only one addr param and it's in the
                         * beginning of addip_hdr params, or we reject it.
@@ -3163,7 +3157,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
                        addr_param_seen = true;
                        break;
                case SCTP_PARAM_IPV6_ADDRESS:
-                       if (length != sizeof(sctp_ipv6addr_param_t))
+                       if (length != sizeof(struct sctp_ipv6addr_param))
                                return false;
                        if (param.v != addip->addip_hdr.params)
                                return false;
index b2a74c3..ae4c48c 100644 (file)
@@ -2336,13 +2336,12 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
                                                 void *arg,
                                                 sctp_cmd_seq_t *commands)
 {
-       struct sctp_chunk *chunk = arg;
-       u32 stale;
-       sctp_cookie_preserve_param_t bht;
-       sctp_errhdr_t *err;
-       struct sctp_chunk *reply;
-       struct sctp_bind_addr *bp;
        int attempts = asoc->init_err_counter + 1;
+       struct sctp_chunk *chunk = arg, *reply;
+       struct sctp_cookie_preserve_param bht;
+       struct sctp_bind_addr *bp;
+       sctp_errhdr_t *err;
+       u32 stale;
 
        if (attempts > asoc->max_init_attempts) {
                sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
index 7b52a38..5c53f22 100644 (file)
@@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
        return false;
 }
 
-#define MAX_RECURSION_LEVEL 4
-
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
        int i;
-       unsigned char max_level = 0;
 
        if (too_many_unix_fds(current))
                return -ETOOMANYREFS;
 
-       for (i = scm->fp->count - 1; i >= 0; i--) {
-               struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
-               if (sk)
-                       max_level = max(max_level,
-                                       unix_sk(sk)->recursion_level);
-       }
-       if (unlikely(max_level > MAX_RECURSION_LEVEL))
-               return -ETOOMANYREFS;
-
        /*
         * Need to duplicate file references for the sake of garbage
         * collection.  Otherwise a socket in the fps might become a
@@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
        for (i = scm->fp->count - 1; i >= 0; i--)
                unix_inflight(scm->fp->user, scm->fp->fp[i]);
-       return max_level;
+       return 0;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
        struct sk_buff *skb;
        long timeo;
        struct scm_cookie scm;
-       int max_level;
        int data_len = 0;
        int sk_locked;
 
@@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
        err = unix_scm_to_skb(&scm, skb, true);
        if (err < 0)
                goto out_free;
-       max_level = err + 1;
 
        skb_put(skb, len - data_len);
        skb->data_len = data_len;
@@ -1819,8 +1804,6 @@ restart_locked:
                __net_timestamp(skb);
        maybe_add_creds(skb, sock, other);
        skb_queue_tail(&other->sk_receive_queue, skb);
-       if (max_level > unix_sk(other)->recursion_level)
-               unix_sk(other)->recursion_level = max_level;
        unix_state_unlock(other);
        other->sk_data_ready(other);
        sock_put(other);
@@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
        int sent = 0;
        struct scm_cookie scm;
        bool fds_sent = false;
-       int max_level;
        int data_len;
 
        wait_for_unix_gc();
@@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                        kfree_skb(skb);
                        goto out_err;
                }
-               max_level = err + 1;
                fds_sent = true;
 
                skb_put(skb, size - data_len);
@@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
                maybe_add_creds(skb, sock, other);
                skb_queue_tail(&other->sk_receive_queue, skb);
-               if (max_level > unix_sk(other)->recursion_level)
-                       unix_sk(other)->recursion_level = max_level;
                unix_state_unlock(other);
                other->sk_data_ready(other);
                sent += size;
@@ -2324,7 +2303,6 @@ redo:
                last_len = last ? last->len : 0;
 again:
                if (skb == NULL) {
-                       unix_sk(sk)->recursion_level = 0;
                        if (copied >= target)
                                goto unlock;
 
index 5f7e8bf..5cd7a24 100644 (file)
@@ -153,6 +153,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+       xfrm_policy_cache_flush();
        return NOTIFY_DONE;
 }
 
@@ -175,8 +176,7 @@ static int xfrm_dev_down(struct net_device *dev)
        if (dev->features & NETIF_F_HW_ESP)
                xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-       xfrm_garbage_collect(dev_net(dev));
-
+       xfrm_policy_cache_flush();
        return NOTIFY_DONE;
 }
 
index ff61d85..06c3bf7 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/cpu.h>
 #include <linux/audit.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
        u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                __read_mostly;
@@ -246,36 +249,6 @@ expired:
        xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-       struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-       if (unlikely(pol->walk.dead))
-               flo = NULL;
-       else
-               xfrm_pol_hold(pol);
-
-       return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-       struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-       return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-       xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-       .get = xfrm_policy_flo_get,
-       .check = xfrm_policy_flo_check,
-       .delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
                                (unsigned long)policy);
                setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
                            (unsigned long)policy);
-               policy->flo.ops = &xfrm_policy_fc_ops;
        }
        return policy;
 }
@@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        else
                hlist_add_head(&policy->bydst, chain);
        __xfrm_policy_link(policy, dir);
-       atomic_inc(&net->xfrm.flow_cache_genid);
 
        /* After previous checking, family can either be AF_INET or AF_INET6 */
        if (policy->family == AF_INET)
@@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
        }
        if (!cnt)
                err = -ESRCH;
+       else
+               xfrm_policy_cache_flush();
 out:
        spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        return err;
@@ -1175,7 +1148,7 @@ fail:
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
        struct xfrm_policy *pol;
@@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
        return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static int flow_to_policy_dir(int dir)
-{
-       if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-           XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-           XFRM_POLICY_FWD == FLOW_DIR_FWD)
-               return dir;
-
-       switch (dir) {
-       default:
-       case FLOW_DIR_IN:
-               return XFRM_POLICY_IN;
-       case FLOW_DIR_OUT:
-               return XFRM_POLICY_OUT;
-       case FLOW_DIR_FWD:
-               return XFRM_POLICY_FWD;
-       }
-}
-
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
-                  u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
-       struct xfrm_policy *pol;
-
-       if (old_obj)
-               xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
-       pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
-       if (IS_ERR_OR_NULL(pol))
-               return ERR_CAST(pol);
-
-       /* Resolver returns two references:
-        * one for cache and one for caller of flow_cache_lookup() */
-       xfrm_pol_hold(pol);
-
-       return &pol->flo;
-}
-
-static inline int policy_to_flow_dir(int dir)
-{
-       if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-           XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-           XFRM_POLICY_FWD == FLOW_DIR_FWD)
-               return dir;
-       switch (dir) {
-       default:
-       case XFRM_POLICY_IN:
-               return FLOW_DIR_IN;
-       case XFRM_POLICY_OUT:
-               return FLOW_DIR_OUT;
-       case XFRM_POLICY_FWD:
-               return FLOW_DIR_FWD;
-       }
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                                                 const struct flowi *fl, u16 family)
 {
@@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                        }
                        err = security_xfrm_policy_lookup(pol->security,
                                                      fl->flowi_secid,
-                                                     policy_to_flow_dir(dir));
+                                                     dir);
                        if (!err) {
                                if (!xfrm_pol_hold_rcu(pol))
                                        goto again;
@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
        return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-       struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-       struct dst_entry *dst = &xdst->u.dst;
-
-       if (xdst->route == NULL) {
-               /* Dummy bundle - if it has xfrms we were not
-                * able to build bundle as template resolution failed.
-                * It means we need to try again resolving. */
-               if (xdst->num_xfrms > 0)
-                       return NULL;
-       } else if (dst->flags & DST_XFRM_QUEUE) {
-               return NULL;
-       } else {
-               /* Real bundle */
-               if (stale_bundle(dst))
-                       return NULL;
-       }
-
-       dst_hold(dst);
-       return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-       struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-       struct dst_entry *dst = &xdst->u.dst;
-
-       if (!xdst->route)
-               return 0;
-       if (stale_bundle(dst))
-               return 0;
-
-       return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-       struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-       struct dst_entry *dst = &xdst->u.dst;
-
-       /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-       dst->obsolete = DST_OBSOLETE_DEAD;
-       dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-       .get = xfrm_bundle_flo_get,
-       .check = xfrm_bundle_flo_check,
-       .delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
        const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
                struct dst_entry *dst = &xdst->u.dst;
 
                memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-               xdst->flo.ops = &xfrm_bundle_fc_ops;
        } else
                xdst = ERR_PTR(-ENOBUFS);
 
@@ -1840,6 +1705,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+       this_cpu_write(xfrm_last_dst, xdst);
+       if (old)
+               dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+       struct xfrm_dst *old;
+
+       old = this_cpu_read(xfrm_last_dst);
+       if (old && !xfrm_bundle_ok(old))
+               xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+       local_bh_disable();
+       rcu_read_lock();
+       __xfrm_pcpu_work_fn();
+       rcu_read_unlock();
+       local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+       struct xfrm_dst *old;
+       bool found = 0;
+       int cpu;
+
+       local_bh_disable();
+       rcu_read_lock();
+       for_each_possible_cpu(cpu) {
+               old = per_cpu(xfrm_last_dst, cpu);
+               if (old && !xfrm_bundle_ok(old)) {
+                       if (smp_processor_id() == cpu) {
+                               __xfrm_pcpu_work_fn();
+                               continue;
+                       }
+                       found = true;
+                       break;
+               }
+       }
+
+       rcu_read_unlock();
+       local_bh_enable();
+
+       if (!found)
+               return;
+
+       get_online_cpus();
+
+       for_each_possible_cpu(cpu) {
+               bool bundle_release;
+
+               rcu_read_lock();
+               old = per_cpu(xfrm_last_dst, cpu);
+               bundle_release = old && !xfrm_bundle_ok(old);
+               rcu_read_unlock();
+
+               if (!bundle_release)
+                       continue;
+
+               if (cpu_online(cpu)) {
+                       schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+                       continue;
+               }
+
+               rcu_read_lock();
+               old = per_cpu(xfrm_last_dst, cpu);
+               if (old && !xfrm_bundle_ok(old)) {
+                       per_cpu(xfrm_last_dst, cpu) = NULL;
+                       dst_release(&old->u.dst);
+               }
+               rcu_read_unlock();
+       }
+
+       put_online_cpus();
+}
+
+static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+{
+       unsigned int num_pols = xdst->num_pols;
+       unsigned int pol_dead = 0, i;
+
+       for (i = 0; i < num_pols; i++)
+               pol_dead |= xdst->pols[i]->walk.dead;
+
+       /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+       if (pol_dead)
+               xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+
+       return pol_dead;
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                               const struct flowi *fl, u16 family,
@@ -1847,10 +1808,22 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 {
        struct net *net = xp_net(pols[0]);
        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+       struct xfrm_dst *xdst, *old;
        struct dst_entry *dst;
-       struct xfrm_dst *xdst;
        int err;
 
+       xdst = this_cpu_read(xfrm_last_dst);
+       if (xdst &&
+           xdst->u.dst.dev == dst_orig->dev &&
+           xdst->num_pols == num_pols &&
+           !xfrm_pol_dead(xdst) &&
+           memcmp(xdst->pols, pols,
+                  sizeof(struct xfrm_policy *) * num_pols) == 0) {
+               dst_hold(&xdst->u.dst);
+               return xdst;
+       }
+
+       old = xdst;
        /* Try to instantiate a bundle */
        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
        if (err <= 0) {
@@ -1871,6 +1844,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
        xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+       atomic_set(&xdst->u.dst.__refcnt, 2);
+       xfrm_last_dst_update(xdst, old);
+
        return xdst;
 }
 
@@ -2051,86 +2027,39 @@ free_dst:
        goto out;
 }
 
-static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
-                  struct flow_cache_object *oldflo, void *ctx)
+static struct xfrm_dst *
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
-       struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-       struct xfrm_dst *xdst, *new_xdst;
-       int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-
-       /* Check if the policies from old bundle are usable */
-       xdst = NULL;
-       if (oldflo) {
-               xdst = container_of(oldflo, struct xfrm_dst, flo);
-               num_pols = xdst->num_pols;
-               num_xfrms = xdst->num_xfrms;
-               pol_dead = 0;
-               for (i = 0; i < num_pols; i++) {
-                       pols[i] = xdst->pols[i];
-                       pol_dead |= pols[i]->walk.dead;
-               }
-               if (pol_dead) {
-                       /* Mark DST_OBSOLETE_DEAD to fail the next
-                        * xfrm_dst_check()
-                        */
-                       xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-                       dst_release_immediate(&xdst->u.dst);
-                       xdst = NULL;
-                       num_pols = 0;
-                       num_xfrms = 0;
-                       oldflo = NULL;
-               }
-       }
+       int num_pols = 0, num_xfrms = 0, err;
+       struct xfrm_dst *xdst;
 
        /* Resolve policies to use if we couldn't get them from
         * previous cache entry */
-       if (xdst == NULL) {
-               num_pols = 1;
-               pols[0] = __xfrm_policy_lookup(net, fl, family,
-                                              flow_to_policy_dir(dir));
-               err = xfrm_expand_policies(fl, family, pols,
+       num_pols = 1;
+       pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+       err = xfrm_expand_policies(fl, family, pols,
                                           &num_pols, &num_xfrms);
-               if (err < 0)
-                       goto inc_error;
-               if (num_pols == 0)
-                       return NULL;
-               if (num_xfrms <= 0)
-                       goto make_dummy_bundle;
-       }
+       if (err < 0)
+               goto inc_error;
+       if (num_pols == 0)
+               return NULL;
+       if (num_xfrms <= 0)
+               goto make_dummy_bundle;
 
-       new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+       xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
                                                  xflo->dst_orig);
-       if (IS_ERR(new_xdst)) {
-               err = PTR_ERR(new_xdst);
+       if (IS_ERR(xdst)) {
+               err = PTR_ERR(xdst);
                if (err != -EAGAIN)
                        goto error;
-               if (oldflo == NULL)
-                       goto make_dummy_bundle;
-               dst_hold(&xdst->u.dst);
-               return oldflo;
-       } else if (new_xdst == NULL) {
+               goto make_dummy_bundle;
+       } else if (xdst == NULL) {
                num_xfrms = 0;
-               if (oldflo == NULL)
-                       goto make_dummy_bundle;
-               xdst->num_xfrms = 0;
-               dst_hold(&xdst->u.dst);
-               return oldflo;
-       }
-
-       /* Kill the previous bundle */
-       if (xdst) {
-               /* The policies were stolen for newly generated bundle */
-               xdst->num_pols = 0;
-               /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-               xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-               dst_release_immediate(&xdst->u.dst);
+               goto make_dummy_bundle;
        }
 
-       /* We do need to return one reference for original caller */
-       dst_hold(&new_xdst->u.dst);
-       return &new_xdst->flo;
+       return xdst;
 
 make_dummy_bundle:
        /* We found policies, but there's no bundles to instantiate:
@@ -2146,17 +2075,12 @@ make_dummy_bundle:
        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 
        dst_hold(&xdst->u.dst);
-       return &xdst->flo;
+       return xdst;
 
 inc_error:
        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 error:
-       if (xdst != NULL) {
-               /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-               xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-               dst_release_immediate(&xdst->u.dst);
-       } else
-               xfrm_pols_put(pols, num_pols);
+       xfrm_pols_put(pols, num_pols);
        return ERR_PTR(err);
 }
 
@@ -2187,11 +2111,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                              const struct sock *sk, int flags)
 {
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-       struct flow_cache_object *flo;
        struct xfrm_dst *xdst;
        struct dst_entry *dst, *route;
        u16 family = dst_orig->ops->family;
-       u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+       u8 dir = XFRM_POLICY_OUT;
        int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
        dst = NULL;
@@ -2242,15 +2165,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                    !net->xfrm.policy_count[XFRM_POLICY_OUT])
                        goto nopol;
 
-               flo = flow_cache_lookup(net, fl, family, dir,
-                                       xfrm_bundle_lookup, &xflo);
-               if (flo == NULL)
+               xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+               if (xdst == NULL)
                        goto nopol;
-               if (IS_ERR(flo)) {
-                       err = PTR_ERR(flo);
+               if (IS_ERR(xdst)) {
+                       err = PTR_ERR(xdst);
                        goto dropdst;
                }
-               xdst = container_of(flo, struct xfrm_dst, flo);
 
                num_pols = xdst->num_pols;
                num_xfrms = xdst->num_xfrms;
@@ -2449,12 +2370,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        int pi;
        int reverse;
        struct flowi fl;
-       u8 fl_dir;
        int xerr_idx = -1;
 
        reverse = dir & ~XFRM_POLICY_MASK;
        dir &= XFRM_POLICY_MASK;
-       fl_dir = policy_to_flow_dir(dir);
 
        if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -2486,16 +2405,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
                }
        }
 
-       if (!pol) {
-               struct flow_cache_object *flo;
-
-               flo = flow_cache_lookup(net, &fl, family, fl_dir,
-                                       xfrm_policy_lookup, NULL);
-               if (IS_ERR_OR_NULL(flo))
-                       pol = ERR_CAST(flo);
-               else
-                       pol = container_of(flo, struct xfrm_policy, flo);
-       }
+       if (!pol)
+               pol = xfrm_policy_lookup(net, &fl, family, dir);
 
        if (IS_ERR(pol)) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2641,11 +2552,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
         * notice.  That's what we are validating here via the
         * stale_bundle() check.
         *
-        * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
-        * be marked on it.
         * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
         * be marked on it.
-        * Both will force stable_bundle() to fail on any xdst bundle with
+        * This will force stale_bundle() to fail on any xdst bundle with
         * this dst linked in it.
         */
        if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2685,18 +2594,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
        return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-       flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-       flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
        do {
@@ -3034,14 +2931,9 @@ static int __net_init xfrm_net_init(struct net *net)
        rv = xfrm_sysctl_init(net);
        if (rv < 0)
                goto out_sysctl;
-       rv = flow_cache_init(net);
-       if (rv < 0)
-               goto out;
 
        return 0;
 
-out:
-       xfrm_sysctl_fini(net);
 out_sysctl:
        xfrm_policy_fini(net);
 out_policy:
@@ -3054,7 +2946,6 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-       flow_cache_fini(net);
        xfrm_sysctl_fini(net);
        xfrm_policy_fini(net);
        xfrm_state_fini(net);
@@ -3068,7 +2959,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-       flow_cache_hp_init();
+       int i;
+
+       xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+                                      GFP_KERNEL);
+       BUG_ON(!xfrm_pcpu_work);
+
+       for (i = 0; i < NR_CPUS; i++)
+               INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
        register_pernet_subsys(&xfrm_net_ops);
        seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
index 6c0956d..82cbbce 100644 (file)
@@ -724,9 +724,10 @@ restart:
                        }
                }
        }
-       if (cnt)
+       if (cnt) {
                err = 0;
-
+               xfrm_policy_cache_flush();
+       }
 out:
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
        return err;
index 2be4c6a..1b539b7 100644 (file)
@@ -1815,8 +1815,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
        xfrm_pol_put(xp);
-       if (delete && err == 0)
-               xfrm_garbage_collect(net);
        return err;
 }
 
@@ -2027,7 +2025,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                        return 0;
                return err;
        }
-       xfrm_garbage_collect(net);
 
        c.data.type = type;
        c.event = nlh->nlmsg_type;
index 87246be..770d46c 100644 (file)
@@ -37,6 +37,8 @@ hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
 hostprogs-y += load_sock_ops
+hostprogs-y += xdp_redirect
+hostprogs-y += xdp_redirect_map
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -78,6 +80,8 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
 test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
 per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
+xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
+xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -119,6 +123,8 @@ always += tcp_bufs_kern.o
 always += tcp_cong_kern.o
 always += tcp_iw_kern.o
 always += tcp_clamp_kern.o
+always += xdp_redirect_kern.o
+always += xdp_redirect_map_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -155,6 +161,8 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
 HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
 HOSTLOADLIBES_test_map_in_map += -lelf
+HOSTLOADLIBES_xdp_redirect += -lelf
+HOSTLOADLIBES_xdp_redirect_map += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
new file mode 100644 (file)
index 0000000..a34ad45
--- /dev/null
@@ -0,0 +1,81 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") rxcnt = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = 1,
+};
+
+
+static void swap_src_dst_mac(void *data)
+{
+       unsigned short *p = data;
+       unsigned short dst[3];
+
+       dst[0] = p[0];
+       dst[1] = p[1];
+       dst[2] = p[2];
+       p[0] = p[3];
+       p[1] = p[4];
+       p[2] = p[5];
+       p[3] = dst[0];
+       p[4] = dst[1];
+       p[5] = dst[2];
+}
+
+SEC("xdp_redirect")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       int rc = XDP_DROP;
+       int *ifindex, port = 0;
+       long *value;
+       u32 key = 0;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return rc;
+
+       ifindex = bpf_map_lookup_elem(&tx_port, &port);
+       if (!ifindex)
+               return rc;
+
+       value = bpf_map_lookup_elem(&rxcnt, &key);
+       if (value)
+               *value += 1;
+
+       swap_src_dst_mac(data);
+       return bpf_redirect(*ifindex, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
new file mode 100644 (file)
index 0000000..2faf196
--- /dev/null
@@ -0,0 +1,83 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+       .type = BPF_MAP_TYPE_DEVMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 100,
+};
+
+struct bpf_map_def SEC("maps") rxcnt = {
+       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = 1,
+};
+
+
+static void swap_src_dst_mac(void *data)
+{
+       unsigned short *p = data;
+       unsigned short dst[3];
+
+       dst[0] = p[0];
+       dst[1] = p[1];
+       dst[2] = p[2];
+       p[0] = p[3];
+       p[1] = p[4];
+       p[2] = p[5];
+       p[3] = dst[0];
+       p[4] = dst[1];
+       p[5] = dst[2];
+}
+
+SEC("xdp_redirect_map")
+int xdp_redirect_map_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       int rc = XDP_DROP;
+       int vport, port = 0, m = 0;
+       long *value;
+       u32 key = 0;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return rc;
+
+       /* constant virtual port */
+       vport = 0;
+
+       /* count packet in global counter */
+       value = bpf_map_lookup_elem(&rxcnt, &key);
+       if (value)
+               *value += 1;
+
+       swap_src_dst_mac(data);
+
+       /* send packet out physical port */
+       return bpf_redirect_map(&tx_port, vport, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
new file mode 100644 (file)
index 0000000..a1ad00f
--- /dev/null
@@ -0,0 +1,137 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+       set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+       exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval, int ifindex)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       __u64 values[nr_cpus], prev[nr_cpus];
+
+       memset(prev, 0, sizeof(prev));
+
+       while (1) {
+               __u64 sum = 0;
+               __u32 key = 0;
+               int i;
+
+               sleep(interval);
+               assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+               for (i = 0; i < nr_cpus; i++)
+                       sum += (values[i] - prev[i]);
+               if (sum)
+                       printf("ifindex %i: %10llu pkt/s\n",
+                              ifindex, sum / interval);
+               memcpy(prev, values, sizeof(values));
+       }
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n",
+               prog);
+}
+
+
+int main(int argc, char **argv)
+{
+       const char *optstr = "SN";
+       char filename[256];
+       int ret, opt, key = 0;
+
+       while ((opt = getopt(argc, argv, optstr)) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       xdp_flags |= XDP_FLAGS_DRV_MODE;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (optind == argc) {
+               printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+               return 1;
+       }
+
+       ifindex_in = strtoul(argv[optind], NULL, 0);
+       ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+       printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       if (!prog_fd[0]) {
+               printf("load_bpf_file: %s\n", strerror(errno));
+               return 1;
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+               printf("link set xdp fd failed\n");
+               return 1;
+       }
+
+       printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n",
+               map_fd[0], map_fd[1], map_fd[2]);
+
+       /* populate virtual to physical port map */
+       ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+       if (ret) {
+               perror("bpf_update_elem");
+               goto out;
+       }
+
+       poll_stats(2, ifindex_out);
+
+out:
+       return 0;
+}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
new file mode 100644 (file)
index 0000000..f705a19
--- /dev/null
@@ -0,0 +1,134 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+       set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+       exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval, int ifindex)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       __u64 values[nr_cpus], prev[nr_cpus];
+
+       memset(prev, 0, sizeof(prev));
+
+       while (1) {
+               __u64 sum = 0;
+               __u32 key = 0;
+               int i;
+
+               sleep(interval);
+               assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+               for (i = 0; i < nr_cpus; i++)
+                       sum += (values[i] - prev[i]);
+               if (sum)
+                       printf("ifindex %i: %10llu pkt/s\n",
+                              ifindex, sum / interval);
+               memcpy(prev, values, sizeof(values));
+       }
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n",
+               prog);
+}
+
+
+int main(int argc, char **argv)
+{
+       const char *optstr = "SN";
+       char filename[256];
+       int ret, opt, key = 0;
+
+       while ((opt = getopt(argc, argv, optstr)) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       xdp_flags |= XDP_FLAGS_DRV_MODE;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (optind == argc) {
+               printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+               return 1;
+       }
+
+       ifindex_in = strtoul(argv[optind], NULL, 0);
+       ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+       printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       if (!prog_fd[0]) {
+               printf("load_bpf_file: %s\n", strerror(errno));
+               return 1;
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+               printf("link set xdp fd failed\n");
+               return 1;
+       }
+
+       /* bpf redirect port */
+       ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+       if (ret) {
+               perror("bpf_update_elem");
+               goto out;
+       }
+
+       poll_stats(2, ifindex_out);
+
+out:
+       return 0;
+}
index 1450f85..36a7ce9 100644 (file)
@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
        struct net *net;
 
        rtnl_lock();
-       for_each_net(net) {
-               atomic_inc(&net->xfrm.flow_cache_genid);
+       for_each_net(net)
                rt_genid_bump_all(net);
-       }
        rtnl_unlock();
 }
 #else
index 89b2506..80f1028 100755 (executable)
@@ -211,6 +211,30 @@ function create_bond {
 
        echo $'\nBond name:' $bondname
 
+       if [ $distro == ubuntu ]
+       then
+               local mainfn=$cfgdir/interfaces
+               local s="^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+${bondname}"
+
+               grep -E "$s" $mainfn
+               if [ $? -eq 0 ]
+               then
+                       echo "WARNING: ${bondname} has been configured already"
+                       return
+               fi
+       elif [ $distro == redhat ] || [ $distro == suse ]
+       then
+               local fn=$cfgdir/ifcfg-$bondname
+               if [ -f $fn ]
+               then
+                       echo "WARNING: ${bondname} has been configured already"
+                       return
+               fi
+       else
+               echo "Unsupported Distro: ${distro}"
+               return
+       fi
+
        echo configuring $primary
        create_eth_cfg_pri_$distro $primary $bondname
 
@@ -219,8 +243,6 @@ function create_bond {
 
        echo creating: $bondname with primary slave: $primary
        create_bond_cfg_$distro $bondname $primary $secondary
-
-       let bondcnt=bondcnt+1
 }
 
 for (( i=0; i < $eth_cnt-1; i++ ))
@@ -228,5 +250,6 @@ do
         if [ -n "${list_match[$i]}" ]
         then
                create_bond ${list_eth[$i]} ${list_match[$i]}
+               let bondcnt=bondcnt+1
         fi
 done
index d50ac34..acbd605 100644 (file)
@@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
        (void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
        (void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+       (void *) BPF_FUNC_redirect_map;
 static int (*bpf_perf_event_output)(void *ctx, void *map,
                                    unsigned long long flags, void *data,
                                    int size) =
index 79601c8..36d6ac3 100644 (file)
@@ -438,6 +438,21 @@ static void test_arraymap_percpu_many_keys(void)
        close(fd);
 }
 
+static void test_devmap(int task, void *data)
+{
+       int next_key, fd;
+       __u32 key, value;
+
+       fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+                           2, 0);
+       if (fd < 0) {
+               printf("Failed to create arraymap '%s'!\n", strerror(errno));
+               exit(1);
+       }
+
+       close(fd);
+}
+
 #define MAP_SIZE (32 * 1024)
 
 static void test_map_large(void)