Merge tag 'zonefs-6.5-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu

index ecd585c..77942ee 100644 (file)
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -513,17 +513,18 @@ Description:      information about CPUs heterogeneity.
                 cpu_capacity: capacity of cpuX.
  
  What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+               /sys/devices/system/cpu/vulnerabilities/l1tf
+               /sys/devices/system/cpu/vulnerabilities/mds
                 /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+               /sys/devices/system/cpu/vulnerabilities/retbleed
+               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
                 /sys/devices/system/cpu/vulnerabilities/spectre_v1
                 /sys/devices/system/cpu/vulnerabilities/spectre_v2
-               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
-               /sys/devices/system/cpu/vulnerabilities/l1tf
-               /sys/devices/system/cpu/vulnerabilities/mds
                 /sys/devices/system/cpu/vulnerabilities/srbds
                 /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
-               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
-               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
-               /sys/devices/system/cpu/vulnerabilities/retbleed
  Date:          January 2018
  Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
  Description:   Information about CPU vulnerabilities
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma

index fca40a5..a80aeda 100644 (file)
--- a/Documentation/ABI/testing/sysfs-platform-hidma
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-*/chid
                 /sys/devices/platform/QCOM8061:*/chid
  Date:          Dec 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains the ID of the channel within the HIDMA instance.
                 It is used to associate a given HIDMA channel with the
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt

index 3b6c5c9..0373745 100644 (file)
--- a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
+++ b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
                 /sys/devices/platform/QCOM8060:*/chanops/chan*/priority
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains either 0 or 1 and indicates if the DMA channel is a
                 low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
                 /sys/devices/platform/QCOM8060:*/chanops/chan*/weight
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains 0..15 and indicates the weight of the channel among
                 equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
                 /sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains the platform specific cycle value to wait after a
                 reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What:         /sys/devices/platform/hidma-mgmt*/dma_channels
                 /sys/devices/platform/QCOM8060:*/dma_channels
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains the number of dma channels supported by one instance
                 of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_major
                 /sys/devices/platform/QCOM8060:*/hw_version_major
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Version number major for the hardware.
  
@@ -49,7 +49,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_minor
                 /sys/devices/platform/QCOM8060:*/hw_version_minor
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Version number minor for the hardware.
  
@@ -57,7 +57,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_rd_xactions
                 /sys/devices/platform/QCOM8060:*/max_rd_xactions
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains a value between 0 and 31. Maximum number of
                 read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_read_request
                 /sys/devices/platform/QCOM8060:*/max_read_request
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Size of each read request. The value needs to be a power
                 of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_wr_xactions
                 /sys/devices/platform/QCOM8060:*/max_wr_xactions
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Contains a value between 0 and 31. Maximum number of
                 write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_write_request
                 /sys/devices/platform/QCOM8060:*/max_write_request
  Date:          Nov 2015
  KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
  Description:
                 Size of each write request. The value needs to be a power
                 of two and can be between 128 and 1024.
diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst

new file mode 100644 (file)

index 0000000..264bfa9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+GDS - Gather Data Sampling
+==========================
+
+Gather Data Sampling is a hardware vulnerability which allows unprivileged
+speculative access to data which was previously stored in vector registers.
+
+Problem
+-------
+When a gather instruction performs loads from memory, different data elements
+are merged into the destination vector register. However, when a gather
+instruction that is transiently executed encounters a fault, stale data from
+architectural or internal vector registers may get transiently forwarded to the
+destination vector register instead. This will allow a malicious attacker to
+infer stale data using typical side channel techniques like cache timing
+attacks. GDS is a purely sampling-based attack.
+
+The attacker uses gather instructions to infer the stale vector register data.
+The victim does not need to do anything special other than use the vector
+registers. The victim does not need to use gather instructions to be
+vulnerable.
+
+Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
+are possible.
+
+Attack scenarios
+----------------
+Without mitigation, GDS can infer stale data across virtually all
+permission boundaries:
+
+       Non-enclaves can infer SGX enclave data
+       Userspace can infer kernel data
+       Guests can infer data from hosts
+       Guest can infer guest from other guests
+       Users can infer data from other users
+
+Because of this, it is important to ensure that the mitigation stays enabled in
+lower-privilege contexts like guests and when running outside SGX enclaves.
+
+The hardware enforces the mitigation for SGX. Likewise, VMMs should  ensure
+that guests are not allowed to disable the GDS mitigation. If a host erred and
+allowed this, a guest could theoretically disable GDS mitigation, mount an
+attack, and re-enable it.
+
+Mitigation mechanism
+--------------------
+This issue is mitigated in microcode. The microcode defines the following new
+bits:
+
+ ================================   ===   ============================
+ IA32_ARCH_CAPABILITIES[GDS_CTRL]   R/O   Enumerates GDS vulnerability
+                                          and mitigation support.
+ IA32_ARCH_CAPABILITIES[GDS_NO]     R/O   Processor is not vulnerable.
+ IA32_MCU_OPT_CTRL[GDS_MITG_DIS]    R/W   Disables the mitigation
+                                          0 by default.
+ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK]   R/W   Locks GDS_MITG_DIS=0. Writes
+                                          to GDS_MITG_DIS are ignored
+                                          Can't be cleared once set.
+ ================================   ===   ============================
+
+GDS can also be mitigated on systems that don't have updated microcode by
+disabling AVX. This can be done by setting gather_data_sampling="force" or
+"clearcpuid=avx" on the kernel command-line.
+
+If used, these options will disable AVX use by turning off XSAVE YMM support.
+However, the processor will still enumerate AVX support.  Userspace that
+does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
+support will break.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The mitigation can be disabled by setting "gather_data_sampling=off" or
+"mitigations=off" on the kernel command line. Not specifying either will default
+to the mitigation being enabled. Specifying "gather_data_sampling=force" will
+use the microcode mitigation when available or disable AVX on affected systems
+where the microcode hasn't been updated to include the mitigation.
+
+GDS System Information
+------------------------
+The kernel provides vulnerability status information through sysfs. For
+GDS this can be accessed by the following sysfs file:
+
+/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected                   Processor not vulnerable.
+ Vulnerable                     Processor vulnerable and mitigation disabled.
+ Vulnerable: No microcode       Processor vulnerable and microcode is missing
+                                mitigation.
+ Mitigation: AVX disabled,
+ no microcode                   Processor is vulnerable and microcode is missing
+                                mitigation. AVX disabled as mitigation.
+ Mitigation: Microcode          Processor is vulnerable and mitigation is in
+                                effect.
+ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
+                                effect and cannot be disabled.
+ Unknown: Dependent on
+ hypervisor status              Running on a virtual guest processor that is
+                                affected but with no way to know if host
+                                processor is mitigated or vulnerable.
+ ============================== =============================================
+
+GDS Default mitigation
+----------------------
+The updated microcode will enable the mitigation by default. The kernel's
+default action is to leave the mitigation enabled.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst

index e061476..a7d37e1 100644 (file)
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -19,3 +19,5 @@ are configurable at compile, boot or run time.
     l1d_flush.rst
     processor_mmio_stale_data.rst
     cross-thread-rsb.rst
+   srso
+   gather_data_sampling.rst
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst

new file mode 100644 (file)

index 0000000..32eb5e6
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/srso.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Speculative Return Stack Overflow (SRSO)
+========================================
+
+This is a mitigation for the speculative return stack overflow (SRSO)
+vulnerability found on AMD processors. The mechanism is by now the well
+known scenario of poisoning CPU functional units - the Branch Target
+Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
+tricking the elevated privilege domain (the kernel) into leaking
+sensitive data.
+
+AMD CPUs predict RET instructions using a Return Address Predictor (aka
+Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
+CALL instruction (i.e., an instruction predicted to be a CALL but is
+not actually a CALL) can create an entry in the RAP which may be used
+to predict the target of a subsequent RET instruction.
+
+The specific circumstances that lead to this varies by microarchitecture
+but the concern is that an attacker can mis-train the CPU BTB to predict
+non-architectural CALL instructions in kernel space and use this to
+control the speculative target of a subsequent kernel RET, potentially
+leading to information disclosure via a speculative side-channel.
+
+The issue is tracked under CVE-2023-20569.
+
+Affected processors
+-------------------
+
+AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
+processors have not been investigated.
+
+System information and options
+------------------------------
+
+First of all, it is required that the latest microcode be loaded for
+mitigations to be effective.
+
+The sysfs file showing SRSO mitigation status is:
+
+  /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
+
+The possible values in this file are:
+
+ - 'Not affected'               The processor is not vulnerable
+
+ - 'Vulnerable: no microcode'   The processor is vulnerable, no
+                                microcode extending IBPB functionality
+                                to address the vulnerability has been
+                                applied.
+
+ - 'Mitigation: microcode'      Extended IBPB functionality microcode
+                                patch has been applied. It does not
+                                address User->Kernel and Guest->Host
+                                transitions protection but it does
+                                address User->User and VM->VM attack
+                                vectors.
+
+                                (spec_rstack_overflow=microcode)
+
+ - 'Mitigation: safe RET'       Software-only mitigation. It complements
+                                the extended IBPB microcode patch
+                                functionality by addressing User->Kernel 
+                                and Guest->Host transitions protection.
+
+                                Selected by default or by
+                                spec_rstack_overflow=safe-ret
+
+ - 'Mitigation: IBPB'           Similar protection as "safe RET" above
+                                but employs an IBPB barrier on privilege
+                                domain crossings (User->Kernel,
+                                Guest->Host).
+
+                                (spec_rstack_overflow=ibpb)
+
+ - 'Mitigation: IBPB on VMEXIT' Mitigation addressing the cloud provider
+                                scenario - the Guest->Host transitions
+                                only.
+
+                                (spec_rstack_overflow=ibpb-vmexit)
+
+In order to exploit vulnerability, an attacker needs to:
+
+ - gain local access on the machine
+
+ - break kASLR
+
+ - find gadgets in the running kernel in order to use them in the exploit
+
+ - potentially create and pin an additional workload on the sibling
+   thread, depending on the microarchitecture (not necessary on fam 0x19)
+
+ - run the exploit
+
+Considering the performance implications of each mitigation type, the
+default one is 'Mitigation: safe RET' which should take care of most
+attack vectors, including the local User->Kernel one.
+
+As always, the user is advised to keep her/his system up-to-date by
+applying software updates regularly.
+
+The default setting will be reevaluated when needed and especially when
+new attack vectors appear.
+
+As one can surmise, 'Mitigation: safe RET' does come at the cost of some
+performance depending on the workload. If one trusts her/his userspace
+and does not want to suffer the performance impact, one can always
+disable the mitigation with spec_rstack_overflow=off.
+
+Similarly, 'Mitigation: IBPB' is another full mitigation type employing
+an indrect branch prediction barrier after having applied the required
+microcode patch for one's system. This mitigation comes also at
+a performance cost.
+
+Mitigation: safe RET
+--------------------
+
+The mitigation works by ensuring all RET instructions speculate to
+a controlled location, similar to how speculation is controlled in the
+retpoline sequence.  To accomplish this, the __x86_return_thunk forces
+the CPU to mispredict every function return using a 'safe return'
+sequence.
+
+To ensure the safety of this mitigation, the kernel must ensure that the
+safe return sequence is itself free from attacker interference.  In Zen3
+and Zen4, this is accomplished by creating a BTB alias between the
+untraining function srso_untrain_ret_alias() and the safe return
+function srso_safe_ret_alias() which results in evicting a potentially
+poisoned BTB entry and using that safe one for all function returns.
+
+In older Zen1 and Zen2, this is accomplished using a reinterpretation
+technique similar to Retbleed one: srso_untrain_ret() and
+srso_safe_ret().
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index a145799..722b6ec 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1623,6 +1623,26 @@
                         Format: off | on
                         default: on
  
+       gather_data_sampling=
+                       [X86,INTEL] Control the Gather Data Sampling (GDS)
+                       mitigation.
+
+                       Gather Data Sampling is a hardware vulnerability which
+                       allows unprivileged speculative access to data which was
+                       previously stored in vector registers.
+
+                       This issue is mitigated by default in updated microcode.
+                       The mitigation may have a performance impact but can be
+                       disabled. On systems without the microcode mitigation
+                       disabling AVX serves as a mitigation.
+
+                       force:  Disable AVX to mitigate systems without
+                               microcode mitigation. No effect if the microcode
+                               mitigation is present. Known to cause crashes in
+                               userspace with buggy AVX enumeration.
+
+                       off:    Disable GDS mitigation.
+
         gcov_persist=   [GCOV] When non-zero (default), profiling data for
                         kernel modules is saved and remains accessible via
                         debugfs, even when the module is unloaded/reloaded.
@@ -3273,24 +3293,25 @@
                                 Disable all optional CPU mitigations.  This
                                 improves system performance, but it may also
                                 expose users to several CPU vulnerabilities.
-                               Equivalent to: nopti [X86,PPC]
-                                              if nokaslr then kpti=0 [ARM64]
-                                              nospectre_v1 [X86,PPC]
-                                              nobp=0 [S390]
-                                              nospectre_v2 [X86,PPC,S390,ARM64]
-                                              spectre_v2_user=off [X86]
-                                              spec_store_bypass_disable=off [X86,PPC]
-                                              ssbd=force-off [ARM64]
-                                              nospectre_bhb [ARM64]
+                               Equivalent to: if nokaslr then kpti=0 [ARM64]
+                                              gather_data_sampling=off [X86]
+                                              kvm.nx_huge_pages=off [X86]
                                                l1tf=off [X86]
                                                mds=off [X86]
-                                              tsx_async_abort=off [X86]
-                                              kvm.nx_huge_pages=off [X86]
-                                              srbds=off [X86,INTEL]
+                                              mmio_stale_data=off [X86]
                                                no_entry_flush [PPC]
                                                no_uaccess_flush [PPC]
-                                              mmio_stale_data=off [X86]
+                                              nobp=0 [S390]
+                                              nopti [X86,PPC]
+                                              nospectre_bhb [ARM64]
+                                              nospectre_v1 [X86,PPC]
+                                              nospectre_v2 [X86,PPC,S390,ARM64]
                                                retbleed=off [X86]
+                                              spec_store_bypass_disable=off [X86,PPC]
+                                              spectre_v2_user=off [X86]
+                                              srbds=off [X86,INTEL]
+                                              ssbd=force-off [ARM64]
+                                              tsx_async_abort=off [X86]
  
                                 Exceptions:
                                                This does not have any effect on
@@ -5875,6 +5896,17 @@
                         Not specifying this option is equivalent to
                         spectre_v2_user=auto.
  
+       spec_rstack_overflow=
+                       [X86] Control RAS overflow mitigation on AMD Zen CPUs
+
+                       off             - Disable mitigation
+                       microcode       - Enable microcode mitigation only
+                       safe-ret        - Enable sw-only safe RET mitigation (default)
+                       ibpb            - Enable mitigation by issuing IBPB on
+                                         kernel entry
+                       ibpb-vmexit     - Issue IBPB only on VMEXIT
+                                         (cloud-specific mitigation)
+
         spec_store_bypass_disable=
                         [HW] Control Speculative Store Bypass (SSB) Disable mitigation
                         (Speculative Store Bypass vulnerability)
diff --git a/MAINTAINERS b/MAINTAINERS

index 0f966f0..8635305 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2339,7 +2339,7 @@ F:        drivers/phy/mediatek/
  ARM/MICROCHIP (ARM64) SoC support
  M:     Conor Dooley <conor@kernel.org>
  M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  T:     git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
@@ -2348,7 +2348,7 @@ F:        arch/arm64/boot/dts/microchip/
  ARM/Microchip (AT91) SoC support
  M:     Nicolas Ferre <nicolas.ferre@microchip.com>
  M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  W:     http://www.linux4sam.org
@@ -3250,7 +3250,7 @@ F:        include/uapi/linux/atm*
  
  ATMEL MACB ETHERNET DRIVER
  M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  S:     Supported
  F:     drivers/net/ethernet/cadence/
  
@@ -12480,6 +12480,7 @@ F:      net/mctp/
  
  MAPLE TREE
  M:     Liam R. Howlett <Liam.Howlett@oracle.com>
+L:     maple-tree@lists.infradead.org
  L:     linux-mm@kvack.org
  S:     Supported
  F:     Documentation/core-api/maple_tree.rst
@@ -13786,7 +13787,7 @@ F:      Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
  F:     drivers/spi/spi-at91-usart.c
  
  MICROCHIP AUDIO ASOC DRIVERS
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:     Supported
  F:     Documentation/devicetree/bindings/sound/atmel*
@@ -13809,7 +13810,7 @@ S:      Maintained
  F:     drivers/crypto/atmel-ecc.*
  
  MICROCHIP EIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  F:     Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml
@@ -13882,7 +13883,7 @@ F:      drivers/video/fbdev/atmel_lcdfb.c
  F:     include/video/atmel_lcdc.h
  
  MICROCHIP MCP16502 PMIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  F:     Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
@@ -13909,7 +13910,7 @@ F:      Documentation/devicetree/bindings/mtd/atmel-nand.txt
  F:     drivers/mtd/nand/raw/atmel/*
  
  MICROCHIP OTPC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  F:     Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml
@@ -13948,7 +13949,7 @@ F:      Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
  F:     drivers/fpga/microchip-spi.c
  
  MICROCHIP PWM DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:     linux-pwm@vger.kernel.org
  S:     Supported
@@ -13964,7 +13965,7 @@ F:      drivers/iio/adc/at91-sama5d2_adc.c
  F:     include/dt-bindings/iio/adc/at91-sama5d2_adc.h
  
  MICROCHIP SAMA5D2-COMPATIBLE SHUTDOWN CONTROLLER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  S:     Supported
  F:     Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml
  F:     drivers/power/reset/at91-sama5d2_shdwc.c
@@ -13981,7 +13982,7 @@ S:      Supported
  F:     drivers/spi/spi-atmel.*
  
  MICROCHIP SSC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Supported
  F:     Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -14010,7 +14011,7 @@ F:      drivers/usb/gadget/udc/atmel_usba_udc.*
  
  MICROCHIP WILC1000 WIFI DRIVER
  M:     Ajay Singh <ajay.kathat@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
  L:     linux-wireless@vger.kernel.org
  S:     Supported
  F:     drivers/net/wireless/microchip/wilc1000/
@@ -16293,6 +16294,7 @@ F:      drivers/pci/controller/dwc/pci-exynos.c
  PCI DRIVER FOR SYNOPSYS DESIGNWARE
  M:     Jingoo Han <jingoohan1@gmail.com>
  M:     Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
  L:     linux-pci@vger.kernel.org
  S:     Maintained
  F:     Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -18508,17 +18510,14 @@ RTL8180 WIRELESS DRIVER
  L:     linux-wireless@vger.kernel.org
  S:     Orphan
  W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
  F:     drivers/net/wireless/realtek/rtl818x/rtl8180/
  
  RTL8187 WIRELESS DRIVER
-M:     Herton Ronaldo Krzesinski <herton@canonical.com>
-M:     Hin-Tak Leung <htl10@users.sourceforge.net>
+M:     Hin-Tak Leung <hintak.leung@gmail.com>
  M:     Larry Finger <Larry.Finger@lwfinger.net>
  L:     linux-wireless@vger.kernel.org
  S:     Maintained
  W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
  F:     drivers/net/wireless/realtek/rtl818x/rtl8187/
  
  RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h

index 8e5ffb5..b7afaa0 100644 (file)
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -31,6 +31,13 @@
  .Lskip_hcrx_\@:
  .endm
  
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+       mrs     \tmp, hcr_el2
+       and     \tmp, \tmp, #HCR_E2H
+       cbz     \tmp, \fail
+.endm
+
  /*
   * Allow Non-secure EL1 and EL0 to access physical timer and counter.
   * This is not necessary for VHE, since the host kernel runs in EL2,
@@ -43,9 +50,7 @@
   */
  .macro __init_el2_timers
         mov     x0, #3                          // Enable EL1 physical timers
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
         lsl     x0, x0, #10
  .LnVHE_\@:
         msr     cnthctl_el2, x0
@@ -139,15 +144,14 @@
  
  /* Coprocessor traps */
  .macro __init_el2_cptr
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
         mov     x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
  .LnVHE_\@:
         mov     x0, #0x33ff
-.Lset_cptr_\@:
         msr     cptr_el2, x0                    // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
  .endm
  
  /* Disable any fine grained traps */
@@ -268,19 +272,19 @@
         check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
  
  .Linit_sve_\@: /* SVE register access */
-       mrs     x0, cptr_el2                    // Disable SVE traps
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .Lcptr_nvhe_\@
+       __check_hvhe .Lcptr_nvhe_\@, x1
  
-       // VHE case
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SVE traps
         orr     x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
  
  .Lcptr_nvhe_\@: // nVHE case
+       mrs     x0, cptr_el2                    // Disable SVE traps
         bic     x0, x0, #CPTR_EL2_TZ
-.Lset_cptr_\@:
         msr     cptr_el2, x0
+.Lskip_set_cptr_\@:
         isb
         mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
         msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
@@ -289,9 +293,19 @@
         check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
  
  .Linit_sme_\@: /* SME register access and priority mapping */
+       __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SME traps
+       orr     x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
         mrs     x0, cptr_el2                    // Disable SME traps
         bic     x0, x0, #CPTR_EL2_TSM
         msr     cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
         isb
  
         mrs     x1, sctlr_el2
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h

index 7d170aa..24e28bb 100644 (file)
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -278,7 +278,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
  asmlinkage void kvm_unexpected_el2_exception(void);
  struct kvm_cpu_context;
  void handle_trap(struct kvm_cpu_context *host_ctxt);
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
  void __noreturn __pkvm_init_finalise(void);
  void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
  void kvm_patch_vector_branch(struct alt_instr *alt,
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h

index efc0b45..3d6725f 100644 (file)
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -571,6 +571,14 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
         return test_bit(feature, vcpu->arch.features);
  }
  
+static __always_inline void kvm_write_cptr_el2(u64 val)
+{
+       if (has_vhe() || has_hvhe())
+               write_sysreg(val, cpacr_el1);
+       else
+               write_sysreg(val, cptr_el2);
+}
+
  static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
  {
         u64 val;
@@ -578,8 +586,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
         if (has_vhe()) {
                 val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
                        CPACR_EL1_ZEN_EL1EN);
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN;
         } else if (has_hvhe()) {
                 val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+
+               if (!vcpu_has_sve(vcpu) ||
+                   (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+                       val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
         } else {
                 val = CPTR_NVHE_EL2_RES1;
  
@@ -597,9 +613,6 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
  {
         u64 val = kvm_get_reset_cptr_el2(vcpu);
  
-       if (has_vhe() || has_hvhe())
-               write_sysreg(val, cpacr_el1);
-       else
-               write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
  }
  #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index 72dc53a..d1cb298 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -55,7 +55,7 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
  
  static bool vgic_present, kvm_arm_initialised;
  
-static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized);
  DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
  
  bool is_kvm_arm_initialised(void)
@@ -1864,18 +1864,24 @@ static void cpu_hyp_reinit(void)
         cpu_hyp_init_features();
  }
  
-static void _kvm_arch_hardware_enable(void *discard)
+static void cpu_hyp_init(void *discard)
  {
-       if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+       if (!__this_cpu_read(kvm_hyp_initialized)) {
                 cpu_hyp_reinit();
-               __this_cpu_write(kvm_arm_hardware_enabled, 1);
+               __this_cpu_write(kvm_hyp_initialized, 1);
         }
  }
  
-int kvm_arch_hardware_enable(void)
+static void cpu_hyp_uninit(void *discard)
  {
-       int was_enabled;
+       if (__this_cpu_read(kvm_hyp_initialized)) {
+               cpu_hyp_reset();
+               __this_cpu_write(kvm_hyp_initialized, 0);
+       }
+}
  
+int kvm_arch_hardware_enable(void)
+{
         /*
          * Most calls to this function are made with migration
          * disabled, but not with preemption disabled. The former is
@@ -1884,36 +1890,23 @@ int kvm_arch_hardware_enable(void)
          */
         preempt_disable();
  
-       was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
-       _kvm_arch_hardware_enable(NULL);
+       cpu_hyp_init(NULL);
  
-       if (!was_enabled) {
-               kvm_vgic_cpu_up();
-               kvm_timer_cpu_up();
-       }
+       kvm_vgic_cpu_up();
+       kvm_timer_cpu_up();
  
         preempt_enable();
  
         return 0;
  }
  
-static void _kvm_arch_hardware_disable(void *discard)
-{
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               cpu_hyp_reset();
-               __this_cpu_write(kvm_arm_hardware_enabled, 0);
-       }
-}
-
  void kvm_arch_hardware_disable(void)
  {
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               kvm_timer_cpu_down();
-               kvm_vgic_cpu_down();
-       }
+       kvm_timer_cpu_down();
+       kvm_vgic_cpu_down();
  
         if (!is_protected_kvm_enabled())
-               _kvm_arch_hardware_disable(NULL);
+               cpu_hyp_uninit(NULL);
  }
  
  #ifdef CONFIG_CPU_PM
@@ -1922,16 +1915,16 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                                     void *v)
  {
         /*
-        * kvm_arm_hardware_enabled is left with its old value over
+        * kvm_hyp_initialized is left with its old value over
          * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
          * re-enable hyp.
          */
         switch (cmd) {
         case CPU_PM_ENTER:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
+               if (__this_cpu_read(kvm_hyp_initialized))
                         /*
-                        * don't update kvm_arm_hardware_enabled here
-                        * so that the hardware will be re-enabled
+                        * don't update kvm_hyp_initialized here
+                        * so that the hyp will be re-enabled
                          * when we resume. See below.
                          */
                         cpu_hyp_reset();
@@ -1939,8 +1932,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                 return NOTIFY_OK;
         case CPU_PM_ENTER_FAILED:
         case CPU_PM_EXIT:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
-                       /* The hardware was enabled before suspend. */
+               if (__this_cpu_read(kvm_hyp_initialized))
+                       /* The hyp was enabled before suspend. */
                         cpu_hyp_reinit();
  
                 return NOTIFY_OK;
@@ -2021,7 +2014,7 @@ static int __init init_subsystems(void)
         /*
          * Enable hardware so that subsystem initialisation can access EL2.
          */
-       on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+       on_each_cpu(cpu_hyp_init, NULL, 1);
  
         /*
          * Register CPU lower-power notifier
@@ -2059,7 +2052,7 @@ out:
                 hyp_cpu_pm_exit();
  
         if (err || !is_protected_kvm_enabled())
-               on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+               on_each_cpu(cpu_hyp_uninit, NULL, 1);
  
         return err;
  }
@@ -2097,7 +2090,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
          * The stub hypercalls are now disabled, so set our local flag to
          * prevent a later re-init attempt in kvm_arch_hardware_enable().
          */
-       __this_cpu_write(kvm_arm_hardware_enabled, 1);
+       __this_cpu_write(kvm_hyp_initialized, 1);
         preempt_enable();
  
         return ret;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h

index 4bddb85..34f222a 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -457,6 +457,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
          */
         val &= ~(TCR_HD | TCR_HA);
         write_sysreg_el1(val, SYS_TCR);
+       __kvm_skip_instr(vcpu);
         return true;
  }
  
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c

index 58dcd92..ab4f5d1 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -705,7 +705,20 @@ int hyp_ffa_init(void *pages)
         if (res.a0 == FFA_RET_NOT_SUPPORTED)
                 return 0;
  
-       if (res.a0 != FFA_VERSION_1_0)
+       /*
+        * Firmware returns the maximum supported version of the FF-A
+        * implementation. Check that the returned version is
+        * backwards-compatible with the hyp according to the rules in DEN0077A
+        * v1.1 REL0 13.2.1.
+        *
+        * Of course, things are never simple when dealing with firmware. v1.1
+        * broke ABI with v1.0 on several structures, which is itself
+        * incompatible with the aforementioned versioning scheme. The
+        * expectation is that v1.x implementations that do not support the v1.0
+        * ABI return NOT_SUPPORTED rather than a version number, according to
+        * DEN0077A v1.1 REL0 18.6.4.
+        */
+       if (FFA_MAJOR_VERSION(res.a0) != 1)
                 return -EOPNOTSUPP;
  
         arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c

index 0a62710..e89a231 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -63,7 +63,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                 __activate_traps_fpsimd32(vcpu);
         }
  
-       write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
         write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
  
         if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug

index 1401e4c..bf2b21b 100644 (file)
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -2,7 +2,7 @@
  #
  config LIGHTWEIGHT_SPINLOCK_CHECK
         bool "Enable lightweight spinlock checks"
-       depends on SMP && !DEBUG_SPINLOCK
+       depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK
         default y
         help
           Add checks with low performance impact to the spinlock functions
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c

index 7ee49f5..d389359 100644 (file)
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -117,7 +117,7 @@ char *strchr(const char *s, int c)
         return NULL;
  }
  
-int puts(const char *s)
+static int puts(const char *s)
  {
         const char *nuline = s;
  
@@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base)
         return 0;
  }
  
-int printf(const char *fmt, ...)
+static int printf(const char *fmt, ...)
  {
         va_list args;
         int i = 0;
@@ -204,13 +204,13 @@ void abort(void)
  }
  
  #undef malloc
-void *malloc(size_t size)
+static void *malloc(size_t size)
  {
         return malloc_gzip(size);
  }
  
  #undef free
-void free(void *ptr)
+static void free(void *ptr)
  {
         return free_gzip(ptr);
  }
@@ -278,7 +278,7 @@ static void parse_elf(void *output)
         free(phdrs);
  }
  
-unsigned long decompress_kernel(unsigned int started_wide,
+asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide,
                 unsigned int command_line,
                 const unsigned int rd_start,
                 const unsigned int rd_end)
diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h

index 9e8c101..582fb5d 100644 (file)
--- a/arch/parisc/include/asm/dma.h
+++ b/arch/parisc/include/asm/dma.h
@@ -14,6 +14,8 @@
  #define dma_outb       outb
  #define dma_inb                inb
  
+extern unsigned long pcxl_dma_start;
+
  /*
  ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
  ** (or rather not merge) DMAs into manageable chunks.
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h

index a7cf0d0..f1cc1ee 100644 (file)
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -12,6 +12,10 @@ extern void mcount(void);
  extern unsigned long sys_call_table[];
  
  extern unsigned long return_address(unsigned int);
+struct ftrace_regs;
+extern void ftrace_function_trampoline(unsigned long parent,
+               unsigned long self_addr, unsigned long org_sp_gr3,
+               struct ftrace_regs *fregs);
  
  #ifdef CONFIG_DYNAMIC_FTRACE
  extern void ftrace_caller(void);
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h

index edfcb98..0b326e5 100644 (file)
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -7,8 +7,6 @@
  #include <asm/processor.h>
  #include <asm/spinlock_types.h>
  
-#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
-
  static inline void arch_spin_val_check(int lock_val)
  {
         if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK))
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h

index d659340..efd06a8 100644 (file)
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -4,6 +4,10 @@
  
  #define __ARCH_SPIN_LOCK_UNLOCKED_VAL  0x1a46
  
+#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
+
+#ifndef __ASSEMBLY__
+
  typedef struct {
  #ifdef CONFIG_PA20
         volatile unsigned int slock;
@@ -27,6 +31,8 @@ typedef struct {
         volatile unsigned int   counter;
  } arch_rwlock_t;
  
+#endif /* __ASSEMBLY__ */
+
  #define __ARCH_RW_LOCK_UNLOCKED__       0x01000000
  #define __ARCH_RW_LOCK_UNLOCKED         { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \
                                         .counter = __ARCH_RW_LOCK_UNLOCKED__ }
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c

index 6d1c781..8f37e75 100644 (file)
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -74,8 +74,8 @@
  static DEFINE_SPINLOCK(pdc_lock);
  #endif
  
-unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
-unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
+static unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
+static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
  
  #ifdef CONFIG_64BIT
  #define WIDE_FIRMWARE 0x1
@@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void)
  /**
   * pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state
   */
-void pdc_cpu_rendezvous_lock(void)
+void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock)
  {
         spin_lock(&pdc_lock);
  }
@@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void)
  /**
   * pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state
   */
-void pdc_cpu_rendezvous_unlock(void)
+void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock)
  {
         spin_unlock(&pdc_lock);
  }
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c

index 4d392e4..d1defb9 100644 (file)
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
  
  static ftrace_func_t ftrace_func;
  
-void notrace __hot ftrace_function_trampoline(unsigned long parent,
+asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
                                 unsigned long self_addr,
                                 unsigned long org_sp_gr3,
                                 struct ftrace_regs *fregs)
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c

index 00297e8..6f0c92e 100644 (file)
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -14,6 +14,7 @@
  #include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/syscalls.h>
+#include <linux/libgcc.h>
  
  #include <linux/string.h>
  EXPORT_SYMBOL(memset);
@@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12);
  EXPORT_SYMBOL($$divI_14);
  EXPORT_SYMBOL($$divI_15);
  
-extern void __ashrdi3(void);
-extern void __ashldi3(void);
-extern void __lshrdi3(void);
-extern void __muldi3(void);
-extern void __ucmpdi2(void);
-
  EXPORT_SYMBOL(__ashrdi3);
  EXPORT_SYMBOL(__ashldi3);
  EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c

index 3f6b507..bf9f192 100644 (file)
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
  static unsigned long pcxl_used_bytes __read_mostly;
  static unsigned long pcxl_used_pages __read_mostly;
  
-extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
+unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */
  static DEFINE_SPINLOCK(pcxl_res_lock);
  static char    *pcxl_res_map;
  static int     pcxl_res_hint;
@@ -381,7 +381,7 @@ pcxl_dma_init(void)
         pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
                                             get_order(pcxl_res_size));
         memset(pcxl_res_map, 0, pcxl_res_size);
-       proc_gsc_root = proc_mkdir("gsc", NULL);
+       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
         if (!proc_gsc_root)
                 printk(KERN_WARNING
                         "pcxl_dma_init: Unable to create gsc /proc dir entry\n");
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c

index 0d24735..0f9b3b5 100644 (file)
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -354,10 +354,8 @@ static int __init pdt_initcall(void)
                 return -ENODEV;
  
         kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");
-       if (IS_ERR(kpdtd_task))
-               return PTR_ERR(kpdtd_task);
  
-       return 0;
+       return PTR_ERR_OR_ZERO(kpdtd_task);
  }
  
  late_initcall(pdt_initcall);
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c

index 90b04d8..b0f0816 100644 (file)
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -57,7 +57,7 @@ struct rdr_tbl_ent {
  static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
  static int perf_enabled __read_mostly;
  static DEFINE_SPINLOCK(perf_lock);
-struct parisc_device *cpu_device __read_mostly;
+static struct parisc_device *cpu_device __read_mostly;
  
  /* RDRs to write for PCX-W */
  static const int perf_rdrs_W[] =
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c

index 00b0df9..762289b 100644 (file)
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -26,6 +26,7 @@
  #include <asm/processor.h>
  #include <asm/page.h>
  #include <asm/pdc.h>
+#include <asm/smp.h>
  #include <asm/pdcpat.h>
  #include <asm/irq.h>           /* for struct irq_region */
  #include <asm/parisc-device.h>
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c

index 573f830..211a4af 100644 (file)
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -40,11 +40,6 @@
  
  static char __initdata command_line[COMMAND_LINE_SIZE];
  
-/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */
-struct proc_dir_entry * proc_runway_root __read_mostly = NULL;
-struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
-struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
-
  static void __init setup_cmdline(char **cmdline_p)
  {
         extern unsigned int boot_args[];
@@ -196,48 +191,6 @@ const struct seq_operations cpuinfo_op = {
         .show   = show_cpuinfo
  };
  
-static void __init parisc_proc_mkdir(void)
-{
-       /*
-       ** Can't call proc_mkdir() until after proc_root_init() has been
-       ** called by start_kernel(). In other words, this code can't
-       ** live in arch/.../setup.c because start_parisc() calls
-       ** start_kernel().
-       */
-       switch (boot_cpu_data.cpu_type) {
-       case pcxl:
-       case pcxl2:
-               if (NULL == proc_gsc_root)
-               {
-                       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
-               }
-               break;
-        case pcxt_:
-        case pcxu:
-        case pcxu_:
-        case pcxw:
-        case pcxw_:
-        case pcxw2:
-                if (NULL == proc_runway_root)
-                {
-                        proc_runway_root = proc_mkdir("bus/runway", NULL);
-                }
-                break;
-       case mako:
-       case mako2:
-                if (NULL == proc_mckinley_root)
-                {
-                        proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
-                }
-                break;
-       default:
-               /* FIXME: this was added to prevent the compiler 
-                * complaining about missing pcx, pcxs and pcxt
-                * I'm assuming they have neither gsc nor runway */
-               break;
-       }
-}
-
  static struct resource central_bus = {
         .name   = "Central Bus",
         .start  = F_EXTEND(0xfff80000),
@@ -294,7 +247,6 @@ static int __init parisc_init(void)
  {
         u32 osid = (OS_ID_LINUX << 16);
  
-       parisc_proc_mkdir();
         parisc_init_resources();
         do_device_inventory();                  /* probe for hardware */
  
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c

index f886ff0..e8d27de 100644 (file)
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs)
         regs->gr[31] -= 8; /* delayed branching */
  
         /* Get assembler opcode of code in delay branch */
-       uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+       uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4);
         err = get_user(opcode, uaddr);
         if (err)
                 return;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c

index ca2d537..9915062 100644 (file)
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -27,17 +27,12 @@
  #include <linux/elf-randomize.h>
  
  /*
- * Construct an artificial page offset for the mapping based on the virtual
+ * Construct an artificial page offset for the mapping based on the physical
   * address of the kernel file mapping variable.
- * If filp is zero the calculated pgoff value aliases the memory of the given
- * address. This is useful for io_uring where the mapping shall alias a kernel
- * address and a userspace adress where both the kernel and the userspace
- * access the same memory region.
   */
-#define GET_FILP_PGOFF(filp, addr)             \
-       ((filp ? (((unsigned long) filp->f_mapping) >> 8)       \
-                & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)        \
-         + (addr >> PAGE_SHIFT))
+#define GET_FILP_PGOFF(filp)           \
+       (filp ? (((unsigned long) filp->f_mapping) >> 8)        \
+                & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)
  
  static unsigned long shared_align_offset(unsigned long filp_pgoff,
                                          unsigned long pgoff)
@@ -117,7 +112,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
         do_color_align = 0;
         if (filp || (flags & MAP_SHARED))
                 do_color_align = 1;
-       filp_pgoff = GET_FILP_PGOFF(filp, addr);
+       filp_pgoff = GET_FILP_PGOFF(filp);
  
         if (flags & MAP_FIXED) {
                 /* Even MAP_FIXED mappings must reside within TASK_SIZE */
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S

index 1373e51..1f51aa9 100644 (file)
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -39,6 +39,7 @@ registers).
  #include <asm/assembly.h>
  #include <asm/processor.h>
  #include <asm/cache.h>
+#include <asm/spinlock_types.h>
  
  #include <linux/linkage.h>
  
@@ -66,6 +67,16 @@ registers).
         stw     \reg1, 0(%sr2,\reg2)
         .endm
  
+       /* raise exception if spinlock content is not zero or
+        * __ARCH_SPIN_LOCK_UNLOCKED_VAL */
+       .macro  spinlock_check spin_val,tmpreg
+#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg
+       andcm,= \spin_val, \tmpreg, %r0
+       .word   SPINLOCK_BREAK_INSN
+#endif
+       .endm
+
         .text
  
         .import syscall_exit,code
@@ -508,7 +519,8 @@ lws_start:
  
  lws_exit_noerror:
         lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
         ssm     PSW_SM_I, %r0
         b       lws_exit
         copy    %r0, %r21
@@ -521,7 +533,8 @@ lws_wouldblock:
  
  lws_pagefault:
         lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
         ssm     PSW_SM_I, %r0
         ldo     3(%r0),%r28
         b       lws_exit
@@ -619,6 +632,7 @@ lws_compare_and_swap:
  
         /* Try to acquire the lock */
         LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
         comclr,<>       %r0, %r28, %r0
         b,n     lws_wouldblock
  
@@ -772,6 +786,7 @@ cas2_lock_start:
  
         /* Try to acquire the lock */
         LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
         comclr,<>       %r0, %r28, %r0
         b,n     lws_wouldblock
  
@@ -1001,6 +1016,7 @@ atomic_xchg_start:
  
         /* Try to acquire the lock */
         LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
         comclr,<>       %r0, %r28, %r0
         b,n     lws_wouldblock
  
@@ -1199,6 +1215,7 @@ atomic_store_start:
  
         /* Try to acquire the lock */
         LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
         comclr,<>       %r0, %r28, %r0
         b,n     lws_wouldblock
  
@@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start)
         /* lws locks */
         .rept 256
         /* Keep locks aligned at 16-bytes */
-       .word 1
+       .word __ARCH_SPIN_LOCK_UNLOCKED_VAL
         .word 0 
         .word 0
         .word 0
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c

index 8130627..170d0dd 100644 (file)
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -11,6 +11,7 @@
  #include <linux/signal.h>
  #include <linux/ratelimit.h>
  #include <linux/uaccess.h>
+#include <linux/sysctl.h>
  #include <asm/unaligned.h>
  #include <asm/hardirq.h>
  #include <asm/traps.h>
diff --git a/arch/parisc/lib/ucmpdi2.c b/arch/parisc/lib/ucmpdi2.c

index 8e6014a..9d8b4db 100644 (file)
--- a/arch/parisc/lib/ucmpdi2.c
+++ b/arch/parisc/lib/ucmpdi2.c
@@ -1,5 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <linux/module.h>
+#include <linux/libgcc.h>
  
  union ull_union {
         unsigned long long ull;
@@ -9,7 +10,7 @@ union ull_union {
         } ui;
  };
  
-int __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
  {
         union ull_union au = {.ull = a};
         union ull_union bu = {.ull = b};
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c

index a4c7c76..2fe5b44 100644 (file)
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs)
   * For implementation see handle_interruption() in traps.c
   */
  static const char * const trap_description[] = {
-       [1] "High-priority machine check (HPMC)",
-       [2] "Power failure interrupt",
-       [3] "Recovery counter trap",
-       [5] "Low-priority machine check",
-       [6] "Instruction TLB miss fault",
-       [7] "Instruction access rights / protection trap",
-       [8] "Illegal instruction trap",
-       [9] "Break instruction trap",
-       [10] "Privileged operation trap",
-       [11] "Privileged register trap",
-       [12] "Overflow trap",
-       [13] "Conditional trap",
-       [14] "FP Assist Exception trap",
-       [15] "Data TLB miss fault",
-       [16] "Non-access ITLB miss fault",
-       [17] "Non-access DTLB miss fault",
-       [18] "Data memory protection/unaligned access trap",
-       [19] "Data memory break trap",
-       [20] "TLB dirty bit trap",
-       [21] "Page reference trap",
-       [22] "Assist emulation trap",
-       [25] "Taken branch trap",
-       [26] "Data memory access rights trap",
-       [27] "Data memory protection ID trap",
-       [28] "Unaligned data reference trap",
+       [1] =   "High-priority machine check (HPMC)",
+       [2] =   "Power failure interrupt",
+       [3] =   "Recovery counter trap",
+       [5] =   "Low-priority machine check",
+       [6] =   "Instruction TLB miss fault",
+       [7] =   "Instruction access rights / protection trap",
+       [8] =   "Illegal instruction trap",
+       [9] =   "Break instruction trap",
+       [10] =  "Privileged operation trap",
+       [11] =  "Privileged register trap",
+       [12] =  "Overflow trap",
+       [13] =  "Conditional trap",
+       [14] =  "FP Assist Exception trap",
+       [15] =  "Data TLB miss fault",
+       [16] =  "Non-access ITLB miss fault",
+       [17] =  "Non-access DTLB miss fault",
+       [18] =  "Data memory protection/unaligned access trap",
+       [19] =  "Data memory break trap",
+       [20] =  "TLB dirty bit trap",
+       [21] =  "Page reference trap",
+       [22] =  "Assist emulation trap",
+       [25] =  "Taken branch trap",
+       [26] =  "Data memory access rights trap",
+       [27] =  "Data memory protection ID trap",
+       [28] =  "Unaligned data reference trap",
  };
  
  const char *trap_name(unsigned long code)
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c

index 389941c..a088c24 100644 (file)
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -523,10 +523,6 @@ void mark_rodata_ro(void)
  void *parisc_vmalloc_start __ro_after_init;
  EXPORT_SYMBOL(parisc_vmalloc_start);
  
-#ifdef CONFIG_PA11
-unsigned long pcxl_dma_start __ro_after_init;
-#endif
-
  void __init mem_init(void)
  {
         /* Do sanity checks on IPC (compat) structures */
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c

index 345ff0b..d7ee1f4 100644 (file)
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -27,7 +27,7 @@
   */
  void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
  {
-       void __iomem *addr;
+       uintptr_t addr;
         struct vm_struct *area;
         unsigned long offset, last_addr;
         pgprot_t pgprot;
@@ -79,10 +79,9 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
         if (!area)
                 return NULL;
  
-       addr = (void __iomem *) area->addr;
-       if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-                              phys_addr, pgprot)) {
-               vunmap(addr);
+       addr = (uintptr_t) area->addr;
+       if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+               vunmap(area->addr);
                 return NULL;
         }
  
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h

index 8091b8b..b93ffdd 100644 (file)
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -37,6 +37,10 @@ static inline void flush_dcache_page(struct page *page)
  #define flush_icache_user_page(vma, pg, addr, len) \
         flush_icache_mm(vma->vm_mm, 0)
  
+#ifdef CONFIG_64BIT
+#define flush_cache_vmap(start, end)   flush_tlb_kernel_range(start, end)
+#endif
+
  #ifndef CONFIG_SMP
  
  #define flush_icache_all() local_flush_icache_all()
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h

index aff6c33..4c58ee7 100644 (file)
--- a/arch/riscv/include/asm/mmio.h
+++ b/arch/riscv/include/asm/mmio.h
@@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
   * Relaxed I/O memory access primitives. These follow the Device memory
   * ordering rules but do not guarantee any ordering relative to Normal memory
   * accesses.  These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
+ * write) with all other I/O memory accesses to the same peripheral. Since the
+ * platform specification defines that all I/O regions are strongly ordered on
+ * channel 0, no explicit fences are required to enforce this ordering.
   */
  /* FIXME: These are now the same as asm-generic */
  #define __io_rbr()             do {} while (0)
@@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  #endif
  
  /*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.  The memory barriers here are necessary as RISC-V
+ * I/O memory access primitives.  Reads are ordered relative to any following
+ * Normal memory read and delay() loop.  Writes are ordered relative to any
+ * prior Normal memory write.  The memory barriers here are necessary as RISC-V
   * doesn't define any ordering between the memory space and the I/O space.
   */
  #define __io_br()      do {} while (0)
-#define __io_ar(v)     __asm__ __volatile__ ("fence i,r" : : : "memory")
-#define __io_bw()      __asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_ar(v)     ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
+#define __io_bw()      ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
  #define __io_aw()      mmiowb_set_pending()
  
  #define readb(c)       ({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h

index 75970ee..b5680c9 100644 (file)
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata;
  #define PAGE_KERNEL_IO         __pgprot(_PAGE_IOREMAP)
  
  extern pgd_t swapper_pg_dir[];
+extern pgd_t trampoline_pg_dir[];
+extern pgd_t early_pg_dir[];
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  static inline int pmd_present(pmd_t pmd)
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h

index 58d3e44..924d01b 100644 (file)
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -3,12 +3,14 @@
  
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
  
+extern bool pgtable_l4_enabled, pgtable_l5_enabled;
+
  #define IOREMAP_MAX_ORDER (PUD_SHIFT)
  
  #define arch_vmap_pud_supported arch_vmap_pud_supported
  static inline bool arch_vmap_pud_supported(pgprot_t prot)
  {
-       return true;
+       return pgtable_l4_enabled || pgtable_l5_enabled;
  }
  
  #define arch_vmap_pmd_supported arch_vmap_pmd_supported
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c

index a2fc952..35b854c 100644 (file)
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -17,6 +17,11 @@
  #include <asm/smp.h>
  #include <asm/pgtable.h>
  
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+       return phys_id == cpuid_to_hartid_map(cpu);
+}
+
  /*
   * Returns the hart ID of the given device tree node, or -ENODEV if the node
   * isn't an enabled and valid RISC-V hart node.
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c

index 5372b70..c08bb5c 100644 (file)
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                 kbuf.buffer = initrd;
                 kbuf.bufsz = kbuf.memsz = initrd_len;
                 kbuf.buf_align = PAGE_SIZE;
-               kbuf.top_down = false;
+               kbuf.top_down = true;
                 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
                 ret = kexec_add_buffer(&kbuf);
                 if (ret)
@@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
                  * sym, instead of searching the whole relsec.
                  */
                 case R_RISCV_PCREL_HI20:
+               case R_RISCV_CALL_PLT:
                 case R_RISCV_CALL:
                         *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
                                  ENCODE_UJTYPE_IMM(val - addr);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c

index 85bbce0..40420af 100644 (file)
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
         return -ENOENT;
  }
  
-bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
-{
-       return phys_id == cpuid_to_hartid_map(cpu);
-}
-
  static void ipi_stop(void)
  {
         set_cpu_online(smp_processor_id(), false);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c

index 9ce5047..e4c35ac 100644 (file)
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -26,12 +26,13 @@
  #include <linux/kfence.h>
  
  #include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/soc.h>
  #include <asm/io.h>
-#include <asm/ptdump.h>
  #include <asm/numa.h>
+#include <asm/pgtable.h>
+#include <asm/ptdump.h>
+#include <asm/sections.h>
+#include <asm/soc.h>
+#include <asm/tlbflush.h>
  
  #include "../kernel/head.h"
  
@@ -214,8 +215,13 @@ static void __init setup_bootmem(void)
         memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
  
         phys_ram_end = memblock_end_of_DRAM();
+
+       /*
+        * Make sure we align the start of the memory on a PMD boundary so that
+        * at worst, we map the linear mapping with PMD mappings.
+        */
         if (!IS_ENABLED(CONFIG_XIP_KERNEL))
-               phys_ram_base = memblock_start_of_DRAM();
+               phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
  
         /*
          * In 64-bit, any use of __va/__pa before this point is wrong as we
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c

index 8fc0efc..a01bc15 100644 (file)
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -22,7 +22,6 @@
   * region is not and then we have to go down to the PUD level.
   */
  
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
  pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
  p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
  pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 7422db4..e36261b 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2593,6 +2593,13 @@ config CPU_IBRS_ENTRY
           This mitigates both spectre_v2 and retbleed at great cost to
           performance.
  
+config CPU_SRSO
+       bool "Mitigate speculative RAS overflow on AMD"
+       depends on CPU_SUP_AMD && X86_64 && RETHUNK
+       default y
+       help
+         Enable the SRSO mitigation needed on AMD Zen1-4 machines.
+
  config SLS
         bool "Mitigate Straight-Line-Speculation"
         depends on CC_HAS_SLS && X86_64
@@ -2603,6 +2610,25 @@ config SLS
           against straight line speculation. The kernel image might be slightly
           larger.
  
+config GDS_FORCE_MITIGATION
+       bool "Force GDS Mitigation"
+       depends on CPU_SUP_INTEL
+       default n
+       help
+         Gather Data Sampling (GDS) is a hardware vulnerability which allows
+         unprivileged speculative access to data which was previously stored in
+         vector registers.
+
+         This option is equivalent to setting gather_data_sampling=force on the
+         command line. The microcode mitigation is used if present, otherwise
+         AVX is disabled as a mitigation. On affected systems that are missing
+         the microcode any userspace code that unconditionally uses AVX will
+         break with this option set.
+
+         Setting this option on systems not vulnerable to GDS has no effect.
+
+         If in doubt, say N.
+
  endif
  
  config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h

index 8eb74cf..2888c0e 100644 (file)
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -15,6 +15,7 @@
  #include <asm/mpspec.h>
  #include <asm/x86_init.h>
  #include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
  
  #ifdef CONFIG_ACPI_APEI
  # include <asm/pgtable_types.h>
@@ -31,6 +32,7 @@ extern int acpi_skip_timer_override;
  extern int acpi_use_timer_override;
  extern int acpi_fix_pin2_polarity;
  extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
  
  extern u8 acpi_sci_flags;
  extern u32 acpi_sci_override_gsi;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h

index cb8ca46..b69b0d7 100644 (file)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -14,7 +14,7 @@
   * Defines x86 CPU feature bits
   */
  #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
  
  /*
   * Note: If the comment begins with a quoted string, that string is used
@@ -309,6 +309,10 @@
  #define X86_FEATURE_SMBA               (11*32+21) /* "" Slow Memory Bandwidth Allocation */
  #define X86_FEATURE_BMEC               (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
  
+#define X86_FEATURE_SRSO               (11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS         (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT     (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+
  /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
  #define X86_FEATURE_AVX512_BF16                (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -442,6 +446,10 @@
  #define X86_FEATURE_AUTOIBRS           (20*32+ 8) /* "" Automatic IBRS */
  #define X86_FEATURE_NO_SMM_CTL_MSR     (20*32+ 9) /* "" SMM_CTL MSR is not present */
  
+#define X86_FEATURE_SBPB               (20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE                (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO            (20*32+29) /* "" CPU is not affected by SRSO */
+
  /*
   * BUG word(s)
   */
@@ -483,5 +491,9 @@
  #define X86_BUG_RETBLEED               X86_BUG(27) /* CPU is affected by RETBleed */
  #define X86_BUG_EIBRS_PBRSB            X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
  #define X86_BUG_SMT_RSB                        X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS                    X86_BUG(30) /* CPU is affected by Gather Data Sampling */
  
+/* BUG word 2 */
+#define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
  #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h

index a00a53e..1d11135 100644 (file)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -57,6 +57,7 @@
  
  #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
  #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB                  BIT(7)     /* Selective Branch Prediction Barrier */
  
  #define MSR_PPIN_CTL                   0x0000004e
  #define MSR_PPIN                       0x0000004f
@@ -155,6 +156,15 @@
                                                  * Not susceptible to Post-Barrier
                                                  * Return Stack Buffer Predictions.
                                                  */
+#define ARCH_CAP_GDS_CTRL              BIT(25) /*
+                                                * CPU is vulnerable to Gather
+                                                * Data Sampling (GDS) and
+                                                * has controls for mitigation.
+                                                */
+#define ARCH_CAP_GDS_NO                        BIT(26) /*
+                                                * CPU is not vulnerable to Gather
+                                                * Data Sampling (GDS).
+                                                */
  
  #define ARCH_CAP_XAPIC_DISABLE         BIT(21) /*
                                                  * IA32_XAPIC_DISABLE_STATUS MSR
@@ -178,6 +188,8 @@
  #define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
  #define RTM_ALLOW                      BIT(1)  /* TSX development mode */
  #define FB_CLEAR_DIS                   BIT(3)  /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS                   BIT(4)  /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED                        BIT(5)  /* GDS mitigation locked */
  
  #define MSR_IA32_SYSENTER_CS           0x00000174
  #define MSR_IA32_SYSENTER_ESP          0x00000175
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h

index 1a65cf4..3faf044 100644 (file)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -211,7 +211,8 @@
   * eventually turn into it's own annotation.
   */
  .macro VALIDATE_UNRET_END
-#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY)
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+       (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
         ANNOTATE_RETPOLINE_SAFE
         nop
  #endif
@@ -289,13 +290,18 @@
   */
  .macro UNTRAIN_RET
  #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING)
+       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
         VALIDATE_UNRET_END
         ALTERNATIVE_3 "",                                               \
                       CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
                       "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                       __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
  #endif
+
+#ifdef CONFIG_CPU_SRSO
+       ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+                         "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS
+#endif
  .endm
  
  .macro UNTRAIN_RET_FROM_CALL
@@ -307,6 +313,11 @@
                       "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                       __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
  #endif
+
+#ifdef CONFIG_CPU_SRSO
+       ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \
+                         "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS
+#endif
  .endm
  
  
@@ -332,6 +343,8 @@ extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
  
  extern void __x86_return_thunk(void);
  extern void zen_untrain_ret(void);
+extern void srso_untrain_ret(void);
+extern void srso_untrain_ret_alias(void);
  extern void entry_ibpb(void);
  
  #ifdef CONFIG_CALL_THUNKS
@@ -479,11 +492,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
                 : "memory");
  }
  
+extern u64 x86_pred_cmd;
+
  static inline void indirect_branch_prediction_barrier(void)
  {
-       u64 val = PRED_CMD_IBPB;
-
-       alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+       alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
  }
  
  /* The Intel SPEC CTRL MSR base value cache */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index d46300e..973db04 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -682,9 +682,13 @@ extern u16 get_llc_id(unsigned int cpu);
  #ifdef CONFIG_CPU_SUP_AMD
  extern u32 amd_get_nodes_per_socket(void);
  extern u32 amd_get_highest_perf(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
  #else
  static inline u32 amd_get_nodes_per_socket(void)       { return 0; }
  static inline u32 amd_get_highest_perf(void)           { return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void)             { }
  #endif
  
  extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c

index 21b542a..53369c5 100644 (file)
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -52,6 +52,7 @@ int acpi_lapic;
  int acpi_ioapic;
  int acpi_strict;
  int acpi_disable_cmcff;
+bool acpi_int_src_ovr[NR_IRQS_LEGACY];
  
  /* ACPI SCI override configuration */
  u8 acpi_sci_flags __initdata;
@@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
  
         acpi_table_print_madt_entry(&header->common);
  
+       if (intsrc->source_irq < NR_IRQS_LEGACY)
+               acpi_int_src_ovr[intsrc->source_irq] = true;
+
         if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
                 acpi_sci_ioapic_setup(intsrc->source_irq,
                                       intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index 26ad7ca..b55d8f8 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -75,6 +75,10 @@ static const int amd_zenbleed[] =
                            AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
                            AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
  
+static const int amd_div0[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
  static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
  {
         int osvw_id = *erratum++;
@@ -1130,6 +1134,11 @@ static void init_amd(struct cpuinfo_x86 *c)
                 WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
  
         zenbleed_check(c);
+
+       if (cpu_has_amd_erratum(c, amd_div0)) {
+               pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+               setup_force_cpu_bug(X86_BUG_DIV0);
+       }
  }
  
  #ifdef CONFIG_X86_32
@@ -1290,3 +1299,32 @@ void amd_check_microcode(void)
  {
         on_each_cpu(zenbleed_check_cpu, NULL, 1);
  }
+
+bool cpu_has_ibpb_brtype_microcode(void)
+{
+       switch (boot_cpu_data.x86) {
+       /* Zen1/2 IBPB flushes branch type predictions too. */
+       case 0x17:
+               return boot_cpu_has(X86_FEATURE_AMD_IBPB);
+       case 0x19:
+               /* Poke the MSR bit on Zen3/4 to check its presence. */
+               if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+                       setup_force_cpu_cap(X86_FEATURE_SBPB);
+                       return true;
+               } else {
+                       return false;
+               }
+       default:
+               return false;
+       }
+}
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+       asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+                    :: "a" (0), "d" (0), "r" (1));
+}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c

index 9550744..d02f73c 100644 (file)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -47,6 +47,8 @@ static void __init taa_select_mitigation(void);
  static void __init mmio_select_mitigation(void);
  static void __init srbds_select_mitigation(void);
  static void __init l1d_flush_select_mitigation(void);
+static void __init srso_select_mitigation(void);
+static void __init gds_select_mitigation(void);
  
  /* The base value of the SPEC_CTRL MSR without task-specific bits set */
  u64 x86_spec_ctrl_base;
@@ -56,6 +58,9 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
  DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
  EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
  
+u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
+EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
  static DEFINE_MUTEX(spec_ctrl_mutex);
  
  /* Update SPEC_CTRL MSR and its cached copy unconditionally */
@@ -160,6 +165,8 @@ void __init cpu_select_mitigations(void)
         md_clear_select_mitigation();
         srbds_select_mitigation();
         l1d_flush_select_mitigation();
+       srso_select_mitigation();
+       gds_select_mitigation();
  }
  
  /*
@@ -646,6 +653,149 @@ static int __init l1d_flush_parse_cmdline(char *str)
  early_param("l1d_flush", l1d_flush_parse_cmdline);
  
  #undef pr_fmt
+#define pr_fmt(fmt)    "GDS: " fmt
+
+enum gds_mitigations {
+       GDS_MITIGATION_OFF,
+       GDS_MITIGATION_UCODE_NEEDED,
+       GDS_MITIGATION_FORCE,
+       GDS_MITIGATION_FULL,
+       GDS_MITIGATION_FULL_LOCKED,
+       GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+       [GDS_MITIGATION_OFF]            = "Vulnerable",
+       [GDS_MITIGATION_UCODE_NEEDED]   = "Vulnerable: No microcode",
+       [GDS_MITIGATION_FORCE]          = "Mitigation: AVX disabled, no microcode",
+       [GDS_MITIGATION_FULL]           = "Mitigation: Microcode",
+       [GDS_MITIGATION_FULL_LOCKED]    = "Mitigation: Microcode (locked)",
+       [GDS_MITIGATION_HYPERVISOR]     = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+       return (gds_mitigation == GDS_MITIGATION_FULL ||
+               gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+       u64 mcu_ctrl_after;
+       u64 mcu_ctrl;
+
+       switch (gds_mitigation) {
+       case GDS_MITIGATION_OFF:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl |= GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FULL_LOCKED:
+               /*
+                * The LOCKED state comes from the boot CPU. APs might not have
+                * the same state. Make sure the mitigation is enabled on all
+                * CPUs.
+                */
+       case GDS_MITIGATION_FULL:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl &= ~GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FORCE:
+       case GDS_MITIGATION_UCODE_NEEDED:
+       case GDS_MITIGATION_HYPERVISOR:
+               return;
+       };
+
+       wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+       /*
+        * Check to make sure that the WRMSR value was not ignored. Writes to
+        * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+        * processor was not.
+        */
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+       WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+       u64 mcu_ctrl;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+               gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+               goto out;
+       }
+
+       if (cpu_mitigations_off())
+               gds_mitigation = GDS_MITIGATION_OFF;
+       /* Will verify below that mitigation _can_ be disabled */
+
+       /* No microcode */
+       if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+               if (gds_mitigation == GDS_MITIGATION_FORCE) {
+                       /*
+                        * This only needs to be done on the boot CPU so do it
+                        * here rather than in update_gds_msr()
+                        */
+                       setup_clear_cpu_cap(X86_FEATURE_AVX);
+                       pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+               } else {
+                       gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+               }
+               goto out;
+       }
+
+       /* Microcode has mitigation, use it */
+       if (gds_mitigation == GDS_MITIGATION_FORCE)
+               gds_mitigation = GDS_MITIGATION_FULL;
+
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+       if (mcu_ctrl & GDS_MITG_LOCKED) {
+               if (gds_mitigation == GDS_MITIGATION_OFF)
+                       pr_warn("Mitigation locked. Disable failed.\n");
+
+               /*
+                * The mitigation is selected from the boot CPU. All other CPUs
+                * _should_ have the same state. If the boot CPU isn't locked
+                * but others are then update_gds_msr() will WARN() of the state
+                * mismatch. If the boot CPU is locked update_gds_msr() will
+                * ensure the other CPUs have the mitigation enabled.
+                */
+               gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+       }
+
+       update_gds_msr();
+out:
+       pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return 0;
+
+       if (!strcmp(str, "off"))
+               gds_mitigation = GDS_MITIGATION_OFF;
+       else if (!strcmp(str, "force"))
+               gds_mitigation = GDS_MITIGATION_FORCE;
+
+       return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
  #define pr_fmt(fmt)     "Spectre V1 : " fmt
  
  enum spectre_v1_mitigation {
@@ -2188,6 +2338,165 @@ static int __init l1tf_cmdline(char *str)
  early_param("l1tf", l1tf_cmdline);
  
  #undef pr_fmt
+#define pr_fmt(fmt)    "Speculative Return Stack Overflow: " fmt
+
+enum srso_mitigation {
+       SRSO_MITIGATION_NONE,
+       SRSO_MITIGATION_MICROCODE,
+       SRSO_MITIGATION_SAFE_RET,
+       SRSO_MITIGATION_IBPB,
+       SRSO_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+enum srso_mitigation_cmd {
+       SRSO_CMD_OFF,
+       SRSO_CMD_MICROCODE,
+       SRSO_CMD_SAFE_RET,
+       SRSO_CMD_IBPB,
+       SRSO_CMD_IBPB_ON_VMEXIT,
+};
+
+static const char * const srso_strings[] = {
+       [SRSO_MITIGATION_NONE]           = "Vulnerable",
+       [SRSO_MITIGATION_MICROCODE]      = "Mitigation: microcode",
+       [SRSO_MITIGATION_SAFE_RET]       = "Mitigation: safe RET",
+       [SRSO_MITIGATION_IBPB]           = "Mitigation: IBPB",
+       [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
+};
+
+static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
+static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
+
+static int __init srso_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off"))
+               srso_cmd = SRSO_CMD_OFF;
+       else if (!strcmp(str, "microcode"))
+               srso_cmd = SRSO_CMD_MICROCODE;
+       else if (!strcmp(str, "safe-ret"))
+               srso_cmd = SRSO_CMD_SAFE_RET;
+       else if (!strcmp(str, "ibpb"))
+               srso_cmd = SRSO_CMD_IBPB;
+       else if (!strcmp(str, "ibpb-vmexit"))
+               srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
+       else
+               pr_err("Ignoring unknown SRSO option (%s).", str);
+
+       return 0;
+}
+early_param("spec_rstack_overflow", srso_parse_cmdline);
+
+#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options."
+
+static void __init srso_select_mitigation(void)
+{
+       bool has_microcode;
+
+       if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
+               goto pred_cmd;
+
+       /*
+        * The first check is for the kernel running as a guest in order
+        * for guests to verify whether IBPB is a viable mitigation.
+        */
+       has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
+       if (!has_microcode) {
+               pr_warn("IBPB-extending microcode not applied!\n");
+               pr_warn(SRSO_NOTICE);
+       } else {
+               /*
+                * Enable the synthetic (even if in a real CPUID leaf)
+                * flags for guests.
+                */
+               setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+
+               /*
+                * Zen1/2 with SMT off aren't vulnerable after the right
+                * IBPB microcode has been applied.
+                */
+               if ((boot_cpu_data.x86 < 0x19) &&
+                   (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED)))
+                       setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+       }
+
+       if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+               if (has_microcode) {
+                       pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
+                       srso_mitigation = SRSO_MITIGATION_IBPB;
+                       goto pred_cmd;
+               }
+       }
+
+       switch (srso_cmd) {
+       case SRSO_CMD_OFF:
+               return;
+
+       case SRSO_CMD_MICROCODE:
+               if (has_microcode) {
+                       srso_mitigation = SRSO_MITIGATION_MICROCODE;
+                       pr_warn(SRSO_NOTICE);
+               }
+               break;
+
+       case SRSO_CMD_SAFE_RET:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       /*
+                        * Enable the return thunk for generated code
+                        * like ftrace, static_call, etc.
+                        */
+                       setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+
+                       if (boot_cpu_data.x86 == 0x19)
+                               setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+                       else
+                               setup_force_cpu_cap(X86_FEATURE_SRSO);
+                       srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB:
+               if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       if (has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+                               srso_mitigation = SRSO_MITIGATION_IBPB;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB_ON_VMEXIT:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+                               srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+                }
+               break;
+
+       default:
+               break;
+       }
+
+       pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
+
+pred_cmd:
+       if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
+            boot_cpu_has(X86_FEATURE_SBPB))
+               x86_pred_cmd = PRED_CMD_SBPB;
+}
+
+#undef pr_fmt
  #define pr_fmt(fmt) fmt
  
  #ifdef CONFIG_SYSFS
@@ -2385,6 +2694,18 @@ static ssize_t retbleed_show_state(char *buf)
         return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
  }
  
+static ssize_t srso_show_state(char *buf)
+{
+       return sysfs_emit(buf, "%s%s\n",
+                         srso_strings[srso_mitigation],
+                         (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
+}
+
+static ssize_t gds_show_state(char *buf)
+{
+       return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
  static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
                                char *buf, unsigned int bug)
  {
@@ -2434,6 +2755,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
         case X86_BUG_RETBLEED:
                 return retbleed_show_state(buf);
  
+       case X86_BUG_SRSO:
+               return srso_show_state(buf);
+
+       case X86_BUG_GDS:
+               return gds_show_state(buf);
+
         default:
                 break;
         }
@@ -2498,4 +2825,14 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
  {
         return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
  }
+
+ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
+}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
  #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 0ba1067..e3a65e9 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1250,6 +1250,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
  #define RETBLEED       BIT(3)
  /* CPU is affected by SMT (cross-thread) return predictions */
  #define SMT_RSB                BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO           BIT(5)
+/* CPU is affected by GDS */
+#define GDS            BIT(6)
  
  static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
         VULNBL_INTEL_STEPPINGS(IVYBRIDGE,       X86_STEPPING_ANY,               SRBDS),
@@ -1262,27 +1266,30 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
         VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
         VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
         VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
         VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
         VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,    X86_STEPPING_ANY,               RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
         VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE_L,     X86_STEPPING_ANY,               GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE,       X86_STEPPING_ANY,               GDS),
         VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
         VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
         VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
         VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
  
         VULNBL_AMD(0x15, RETBLEED),
         VULNBL_AMD(0x16, RETBLEED),
-       VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
         VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x19, SRSO),
         {}
  };
  
@@ -1406,6 +1413,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
                 setup_force_cpu_bug(X86_BUG_SMT_RSB);
  
+       if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, SRSO))
+                       setup_force_cpu_bug(X86_BUG_SRSO);
+       }
+
+       /*
+        * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+        * an affected processor, the VMM may have disabled the use of GATHER by
+        * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+        * which means that AVX will be disabled.
+        */
+       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+           boot_cpu_has(X86_FEATURE_AVX))
+               setup_force_cpu_bug(X86_BUG_GDS);
+
         if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
                 return;
  
@@ -1962,6 +1984,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
         validate_apic_and_package_id(c);
         x86_spec_ctrl_setup_ap();
         update_srbds_msr();
+       if (boot_cpu_has_bug(X86_BUG_GDS))
+               update_gds_msr();
  
         tsx_ap_init();
  }
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h

index 1c44630..1dcd7d4 100644 (file)
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -83,6 +83,7 @@ void cpu_select_mitigations(void);
  
  extern void x86_spec_ctrl_setup_ap(void);
  extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
  
  extern enum spectre_v2_mitigation spectre_v2_enabled;
  
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index 4a817d2..1885326 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -206,6 +206,8 @@ DEFINE_IDTENTRY(exc_divide_error)
  {
         do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
                       FPE_INTDIV, error_get_trap_addr(regs));
+
+       amd_clear_divider();
  }
  
  DEFINE_IDTENTRY(exc_overflow)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S

index 03c885d..e768132 100644 (file)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -134,13 +134,27 @@ SECTIONS
                 SOFTIRQENTRY_TEXT
  #ifdef CONFIG_RETPOLINE
                 __indirect_thunk_start = .;
-               *(.text.__x86.*)
+               *(.text.__x86.indirect_thunk)
+               *(.text.__x86.return_thunk)
                 __indirect_thunk_end = .;
  #endif
                 STATIC_CALL_TEXT
  
                 ALIGN_ENTRY_TEXT_BEGIN
+#ifdef CONFIG_CPU_SRSO
+               *(.text.__x86.rethunk_untrain)
+#endif
+
                 ENTRY_TEXT
+
+#ifdef CONFIG_CPU_SRSO
+               /*
+                * See the comment above srso_untrain_ret_alias()'s
+                * definition.
+                */
+               . = srso_untrain_ret_alias | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+               *(.text.__x86.rethunk_safe)
+#endif
                 ALIGN_ENTRY_TEXT_END
                 *(.gnu.warning)
  
@@ -509,7 +523,18 @@ INIT_PER_CPU(irq_stack_backing_store);
  #endif
  
  #ifdef CONFIG_RETHUNK
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned");
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+#endif
+
+#ifdef CONFIG_CPU_SRSO
+/*
+ * GNU ld cannot do XOR so do: (A | B) - (A & B) in order to compute the XOR
+ * of the two function addresses:
+ */
+. = ASSERT(((srso_untrain_ret_alias | srso_safe_ret_alias) -
+               (srso_untrain_ret_alias & srso_safe_ret_alias)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+               "SRSO function pair won't alias");
  #endif
  
  #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 7f4d133..d343268 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -729,6 +729,9 @@ void kvm_set_cpu_caps(void)
                 F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
         );
  
+       if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
+               kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
+
         kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
                 F(PERFMON_V2)
         );
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index 07756b7..d3aec1f 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2417,15 +2417,18 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
          */
         memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
  
-       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+       BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
+       memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
  
-       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
  
-       if (ghcb_xcr0_is_valid(ghcb)) {
+       svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+
+       if (kvm_ghcb_xcr0_is_valid(svm)) {
                 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
                 kvm_update_cpuid_runtime(vcpu);
         }
@@ -2436,84 +2439,88 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
         control->exit_code_hi = upper_32_bits(exit_code);
         control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
         control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+       svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
  
         /* Clear the valid entries fields */
         memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
  }
  
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+       return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
  {
-       struct kvm_vcpu *vcpu;
-       struct ghcb *ghcb;
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
         u64 exit_code;
         u64 reason;
  
-       ghcb = svm->sev_es.ghcb;
-
         /*
          * Retrieve the exit code now even though it may not be marked valid
          * as it could help with debugging.
          */
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
  
         /* Only GHCB Usage code 0 is supported */
-       if (ghcb->ghcb_usage) {
+       if (svm->sev_es.ghcb->ghcb_usage) {
                 reason = GHCB_ERR_INVALID_USAGE;
                 goto vmgexit_err;
         }
  
         reason = GHCB_ERR_MISSING_INPUT;
  
-       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_2_is_valid(ghcb))
+       if (!kvm_ghcb_sw_exit_code_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_1_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_2_is_valid(svm))
                 goto vmgexit_err;
  
-       switch (ghcb_get_sw_exit_code(ghcb)) {
+       switch (exit_code) {
         case SVM_EXIT_READ_DR7:
                 break;
         case SVM_EXIT_WRITE_DR7:
-               if (!ghcb_rax_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_EXIT_RDTSC:
                 break;
         case SVM_EXIT_RDPMC:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_EXIT_CPUID:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                         goto vmgexit_err;
-               if (ghcb_get_rax(ghcb) == 0xd)
-                       if (!ghcb_xcr0_is_valid(ghcb))
+               if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
+                       if (!kvm_ghcb_xcr0_is_valid(svm))
                                 goto vmgexit_err;
                 break;
         case SVM_EXIT_INVD:
                 break;
         case SVM_EXIT_IOIO:
-               if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
-                       if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
+                       if (!kvm_ghcb_sw_scratch_is_valid(svm))
                                 goto vmgexit_err;
                 } else {
-                       if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
-                               if (!ghcb_rax_is_valid(ghcb))
+                       if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
+                               if (!kvm_ghcb_rax_is_valid(svm))
                                         goto vmgexit_err;
                 }
                 break;
         case SVM_EXIT_MSR:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                         goto vmgexit_err;
-               if (ghcb_get_sw_exit_info_1(ghcb)) {
-                       if (!ghcb_rax_is_valid(ghcb) ||
-                           !ghcb_rdx_is_valid(ghcb))
+               if (control->exit_info_1) {
+                       if (!kvm_ghcb_rax_is_valid(svm) ||
+                           !kvm_ghcb_rdx_is_valid(svm))
                                 goto vmgexit_err;
                 }
                 break;
         case SVM_EXIT_VMMCALL:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_cpl_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_cpl_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_EXIT_RDTSCP:
@@ -2521,19 +2528,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
         case SVM_EXIT_WBINVD:
                 break;
         case SVM_EXIT_MONITOR:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb) ||
-                   !ghcb_rdx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm) ||
+                   !kvm_ghcb_rdx_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_EXIT_MWAIT:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_VMGEXIT_MMIO_READ:
         case SVM_VMGEXIT_MMIO_WRITE:
-               if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (!kvm_ghcb_sw_scratch_is_valid(svm))
                         goto vmgexit_err;
                 break;
         case SVM_VMGEXIT_NMI_COMPLETE:
@@ -2549,11 +2556,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
         return 0;
  
  vmgexit_err:
-       vcpu = &svm->vcpu;
-
         if (reason == GHCB_ERR_INVALID_USAGE) {
                 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
-                           ghcb->ghcb_usage);
+                           svm->sev_es.ghcb->ghcb_usage);
         } else if (reason == GHCB_ERR_INVALID_EVENT) {
                 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
                             exit_code);
@@ -2563,11 +2568,8 @@ vmgexit_err:
                 dump_ghcb(svm);
         }
  
-       /* Clear the valid entries fields */
-       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, reason);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
  
         /* Resume the guest to "return" the error code. */
         return 1;
@@ -2586,7 +2588,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
                  */
                 if (svm->sev_es.ghcb_sa_sync) {
                         kvm_write_guest(svm->vcpu.kvm,
-                                       ghcb_get_sw_scratch(svm->sev_es.ghcb),
+                                       svm->sev_es.sw_scratch,
                                         svm->sev_es.ghcb_sa,
                                         svm->sev_es.ghcb_sa_len);
                         svm->sev_es.ghcb_sa_sync = false;
@@ -2632,12 +2634,11 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
  static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
  {
         struct vmcb_control_area *control = &svm->vmcb->control;
-       struct ghcb *ghcb = svm->sev_es.ghcb;
         u64 ghcb_scratch_beg, ghcb_scratch_end;
         u64 scratch_gpa_beg, scratch_gpa_end;
         void *scratch_va;
  
-       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       scratch_gpa_beg = svm->sev_es.sw_scratch;
         if (!scratch_gpa_beg) {
                 pr_err("vmgexit: scratch gpa not provided\n");
                 goto e_scratch;
@@ -2708,8 +2709,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
         return 0;
  
  e_scratch:
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
  
         return 1;
  }
@@ -2822,7 +2823,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
         struct vcpu_svm *svm = to_svm(vcpu);
         struct vmcb_control_area *control = &svm->vmcb->control;
         u64 ghcb_gpa, exit_code;
-       struct ghcb *ghcb;
         int ret;
  
         /* Validate the GHCB */
@@ -2847,20 +2847,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
         }
  
         svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
-       ghcb = svm->sev_es.ghcb_map.hva;
  
-       trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb);
  
+       sev_es_sync_from_ghcb(svm);
         ret = sev_es_validate_vmgexit(svm);
         if (ret)
                 return ret;
  
-       sev_es_sync_from_ghcb(svm);
-       ghcb_set_sw_exit_info_1(ghcb, 0);
-       ghcb_set_sw_exit_info_2(ghcb, 0);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
  
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
         switch (exit_code) {
         case SVM_VMGEXIT_MMIO_READ:
                 ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -2898,13 +2896,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                         break;
                 case 1:
                         /* Get AP jump table address */
-                       ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
                         break;
                 default:
                         pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
                                control->exit_info_1);
-                       ghcb_set_sw_exit_info_1(ghcb, 2);
-                       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+                       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
                 }
  
                 ret = 1;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 956726d..03e852d 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1498,7 +1498,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  
         if (sd->current_vmcb != svm->vmcb) {
                 sd->current_vmcb = svm->vmcb;
-               indirect_branch_prediction_barrier();
+
+               if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
+                       indirect_branch_prediction_barrier();
         }
         if (kvm_vcpu_apicv_active(vcpu))
                 avic_vcpu_load(vcpu, cpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index 18af7e7..8239c8d 100644 (file)
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -190,10 +190,12 @@ struct vcpu_sev_es_state {
         /* SEV-ES support */
         struct sev_es_save_area *vmsa;
         struct ghcb *ghcb;
+       u8 valid_bitmap[16];
         struct kvm_host_map ghcb_map;
         bool received_first_sipi;
  
         /* SEV-ES scratch area support */
+       u64 sw_scratch;
         void *ghcb_sa;
         u32 ghcb_sa_len;
         bool ghcb_sa_sync;
@@ -744,4 +746,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
  void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
  void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
  
+#define DEFINE_KVM_GHCB_ACCESSORS(field)                                               \
+       static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
+       {                                                                       \
+               return test_bit(GHCB_BITMAP_IDX(field),                         \
+                               (unsigned long *)&svm->sev_es.valid_bitmap);    \
+       }                                                                       \
+                                                                               \
+       static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
+       {                                                                       \
+               return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
+       }                                                                       \
+
+DEFINE_KVM_GHCB_ACCESSORS(cpl)
+DEFINE_KVM_GHCB_ACCESSORS(rax)
+DEFINE_KVM_GHCB_ACCESSORS(rcx)
+DEFINE_KVM_GHCB_ACCESSORS(rdx)
+DEFINE_KVM_GHCB_ACCESSORS(rbx)
+DEFINE_KVM_GHCB_ACCESSORS(rsi)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
+DEFINE_KVM_GHCB_ACCESSORS(xcr0)
+
  #endif
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S

index 8e8295e..265452f 100644 (file)
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -224,6 +224,9 @@ SYM_FUNC_START(__svm_vcpu_run)
          */
         UNTRAIN_RET
  
+       /* SRSO */
+       ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT
+
         /*
          * Clear all general purpose registers except RSP and RAX to prevent
          * speculative use of the guest's values, even those that are reloaded
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 278dbd3..19d9ff9 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -314,6 +314,8 @@ u64 __read_mostly host_xcr0;
  
  static struct kmem_cache *x86_emulator_cache;
  
+extern bool gds_ucode_mitigated(void);
+
  /*
   * When called, it means the previous get/set msr reached an invalid msr.
   * Return true if we want to ignore/silent this failed msr access.
@@ -1616,7 +1618,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
          ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
          ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
          ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
-        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
  
  static u64 kvm_get_arch_capabilities(void)
  {
@@ -1673,6 +1675,9 @@ static u64 kvm_get_arch_capabilities(void)
                  */
         }
  
+       if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
+               data |= ARCH_CAP_GDS_NO;
+
         return data;
  }
  
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S

index 3fd066d..2cff585 100644 (file)
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -11,6 +11,7 @@
  #include <asm/unwind_hints.h>
  #include <asm/percpu.h>
  #include <asm/frame.h>
+#include <asm/nops.h>
  
         .section .text.__x86.indirect_thunk
  
@@ -131,6 +132,46 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
   */
  #ifdef CONFIG_RETHUNK
  
+/*
+ * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at
+ * special addresses:
+ *
+ * - srso_untrain_ret_alias() is 2M aligned
+ * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14
+ * and 20 in its virtual address are set (while those bits in the
+ * srso_untrain_ret_alias() function are cleared).
+ *
+ * This guarantees that those two addresses will alias in the branch
+ * target buffer of Zen3/4 generations, leading to any potential
+ * poisoned entries at that BTB slot to get evicted.
+ *
+ * As a result, srso_safe_ret_alias() becomes a safe return.
+ */
+#ifdef CONFIG_CPU_SRSO
+       .section .text.__x86.rethunk_untrain
+
+SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_NOENDBR
+       ASM_NOP2
+       lfence
+       jmp __x86_return_thunk
+SYM_FUNC_END(srso_untrain_ret_alias)
+__EXPORT_THUNK(srso_untrain_ret_alias)
+
+       .section .text.__x86.rethunk_safe
+#endif
+
+/* Needs a definition for the __x86_return_thunk alternative below. */
+SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
+#ifdef CONFIG_CPU_SRSO
+       add $8, %_ASM_SP
+       UNWIND_HINT_FUNC
+#endif
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_FUNC_END(srso_safe_ret_alias)
+
         .section .text.__x86.return_thunk
  
  /*
@@ -143,7 +184,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
   *    from re-poisioning the BTB prediction.
   */
         .align 64
-       .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
+       .skip 64 - (__ret - zen_untrain_ret), 0xcc
  SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
         ANNOTATE_NOENDBR
         /*
@@ -175,10 +216,10 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
          * evicted, __x86_return_thunk will suffer Straight Line Speculation
          * which will be contained safely by the INT3.
          */
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(__ret, SYM_L_GLOBAL)
         ret
         int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(__ret)
  
         /*
          * Ensure the TEST decoding / BTB invalidation is complete.
@@ -189,11 +230,45 @@ SYM_CODE_END(__x86_return_thunk)
          * Jump back and execute the RET in the middle of the TEST instruction.
          * INT3 is for SLS protection.
          */
-       jmp __x86_return_thunk
+       jmp __ret
         int3
  SYM_FUNC_END(zen_untrain_ret)
  __EXPORT_THUNK(zen_untrain_ret)
  
+/*
+ * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccccc308c48348,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+       .align 64
+       .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_NOENDBR
+       .byte 0x48, 0xb8
+
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+       add $8, %_ASM_SP
+       ret
+       int3
+       int3
+       int3
+       lfence
+       call srso_safe_ret
+       int3
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+__EXPORT_THUNK(srso_untrain_ret)
+
+SYM_FUNC_START(__x86_return_thunk)
+       ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
+                       "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
+       int3
+SYM_CODE_END(__x86_return_thunk)
  EXPORT_SYMBOL(__x86_return_thunk)
  
  #endif /* CONFIG_RETHUNK */
diff --git a/block/blk-core.c b/block/blk-core.c

index 90de500..9866468 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio)
         struct block_device *bdev = bio->bi_bdev;
         struct request_queue *q = bdev_get_queue(bdev);
         blk_status_t status = BLK_STS_IOERR;
-       struct blk_plug *plug;
  
         might_sleep();
  
-       plug = blk_mq_plug(bio);
-       if (plug && plug->nowait)
-               bio->bi_opf |= REQ_NOWAIT;
-
         /*
          * For a REQ_NOWAIT based request, return -EOPNOTSUPP
          * if queue does not support NOWAIT.
@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
         plug->rq_count = 0;
         plug->multiple_queues = false;
         plug->has_elevator = false;
-       plug->nowait = false;
         INIT_LIST_HEAD(&plug->cb_list);
  
         /*
diff --git a/block/blk-iocost.c b/block/blk-iocost.c

index dd64e20..089fcb9 100644 (file)
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
         if (qos[QOS_MIN] > qos[QOS_MAX])
                 goto einval;
  
-       if (enable) {
+       if (enable && !ioc->enabled) {
                 blk_stat_enable_accounting(disk->queue);
                 blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                 ioc->enabled = true;
-       } else {
+       } else if (!enable && ioc->enabled) {
+               blk_stat_disable_accounting(disk->queue);
                 blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                 ioc->enabled = false;
         }
diff --git a/block/fops.c b/block/fops.c

index a286bf3..838ffad 100644 (file)
--- a/block/fops.c
+++ b/block/fops.c
@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
                 task_io_account_write(bio->bi_iter.bi_size);
         }
  
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               bio->bi_opf |= REQ_NOWAIT;
+
         if (iocb->ki_flags & IOCB_HIPRI) {
-               bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+               bio->bi_opf |= REQ_POLLED;
                 submit_bio(bio);
                 WRITE_ONCE(iocb->private, bio);
         } else {
-               if (iocb->ki_flags & IOCB_NOWAIT)
-                       bio->bi_opf |= REQ_NOWAIT;
                 submit_bio(bio);
         }
         return -EIOCBQUEUED;
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c

index 52b339a..9967fcf 100644 (file)
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -173,6 +173,9 @@ static void internal_free_pages_locked(struct ivpu_bo *bo)
  {
         unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
  
+       if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+               set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
         for (i = 0; i < npages; i++)
                 put_page(bo->pages[i]);
  
@@ -587,6 +590,11 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
         if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
                 drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
  
+       if (bo->flags & DRM_IVPU_BO_WC)
+               set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+       else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+               set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+
         prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
         bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
         if (!bo->kvaddr) {
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c

index 1dd8d5a..a4d9f14 100644 (file)
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -470,6 +470,45 @@ static const struct dmi_system_id asus_laptop[] = {
         { }
  };
  
+static const struct dmi_system_id tongfang_gm_rg[] = {
+       {
+               .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id maingear_laptop[] = {
+       {
+               .ident = "MAINGEAR Vector Pro 2 15",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
+               }
+       },
+       {
+               .ident = "MAINGEAR Vector Pro 2 17",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id pcspecialist_laptop[] = {
+       {
+               .ident = "PCSpecialist Elimina Pro 16 M",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Elimina Pro 16 M"),
+               },
+       },
+       { }
+};
+
  static const struct dmi_system_id lg_laptop[] = {
         {
                 .ident = "LG Electronics 17U70P",
@@ -493,6 +532,9 @@ struct irq_override_cmp {
  static const struct irq_override_cmp override_table[] = {
         { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
         { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
+       { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { pcspecialist_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
         { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
  };
  
@@ -512,6 +554,28 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
                         return entry->override;
         }
  
+#ifdef CONFIG_X86
+       /*
+        * Always use the MADT override info, except for the i8042 PS/2 ctrl
+        * IRQs (1 and 12). For these the DSDT IRQ settings should sometimes
+        * be used otherwise PS/2 keyboards / mice will not work.
+        */
+       if (gsi != 1 && gsi != 12)
+               return true;
+
+       /* If the override comes from an INT_SRC_OVR MADT entry, honor it. */
+       if (acpi_int_src_ovr[gsi])
+               return true;
+
+       /*
+        * IRQ override isn't needed on modern AMD Zen systems and
+        * this override breaks active low IRQs on AMD Ryzen 6000 and
+        * newer systems. Skip it.
+        */
+       if (boot_cpu_has(X86_FEATURE_ZEN))
+               return false;
+#endif
+
         return true;
  }
  
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c

index c1815b9..52df435 100644 (file)
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -577,6 +577,18 @@ ssize_t __weak cpu_show_retbleed(struct device *dev,
         return sysfs_emit(buf, "Not affected\n");
  }
  
+ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev,
+                                            struct device_attribute *attr, char *buf)
+{
+       return sysfs_emit(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_gds(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       return sysfs_emit(buf, "Not affected\n");
+}
+
  static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
  static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
  static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -588,6 +600,8 @@ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
  static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
  static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
  static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
  
  static struct attribute *cpu_root_vulnerabilities_attrs[] = {
         &dev_attr_meltdown.attr,
@@ -601,6 +615,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
         &dev_attr_srbds.attr,
         &dev_attr_mmio_stale_data.attr,
         &dev_attr_retbleed.attr,
+       &dev_attr_spec_rstack_overflow.attr,
+       &dev_attr_gather_data_sampling.attr,
         NULL
  };
  
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 5676e6d..06673c6 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
  
  static void zram_bio_read(struct zram *zram, struct bio *bio)
  {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
  
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                 SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
  
                 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
                         atomic64_inc(&zram->stats.failed_reads);
@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
                 zram_slot_lock(zram, index);
                 zram_accessed(zram, index);
                 zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
         bio_end_io_acct(bio, start_time);
         bio_endio(bio);
  }
  
  static void zram_bio_write(struct zram *zram, struct bio *bio)
  {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
  
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                 SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
  
                 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
                         atomic64_inc(&zram->stats.failed_writes);
@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
                 zram_slot_lock(zram, index);
                 zram_accessed(zram, index);
                 zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
         bio_end_io_acct(bio, start_time);
         bio_endio(bio);
  }
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c

index cf5499e..ea6b401 100644 (file)
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -510,70 +510,6 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip)
         return 0;
  }
  
-/*
- * Some AMD fTPM versions may cause stutter
- * https://www.amd.com/en/support/kb/faq/pa-410
- *
- * Fixes are available in two series of fTPM firmware:
- * 6.x.y.z series: 6.0.18.6 +
- * 3.x.y.z series: 3.57.y.5 +
- */
-#ifdef CONFIG_X86
-static bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
-       u32 val1, val2;
-       u64 version;
-       int ret;
-
-       if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
-               return false;
-
-       ret = tpm_request_locality(chip);
-       if (ret)
-               return false;
-
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL);
-       if (ret)
-               goto release;
-       if (val1 != 0x414D4400U /* AMD */) {
-               ret = -ENODEV;
-               goto release;
-       }
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL);
-       if (ret)
-               goto release;
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL);
-
-release:
-       tpm_relinquish_locality(chip);
-
-       if (ret)
-               return false;
-
-       version = ((u64)val1 << 32) | val2;
-       if ((version >> 48) == 6) {
-               if (version >= 0x0006000000180006ULL)
-                       return false;
-       } else if ((version >> 48) == 3) {
-               if (version >= 0x0003005700000005ULL)
-                       return false;
-       } else {
-               return false;
-       }
-
-       dev_warn(&chip->dev,
-                "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n",
-                version);
-
-       return true;
-}
-#else
-static inline bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
-       return false;
-}
-#endif /* CONFIG_X86 */
-
  static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
  {
         struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng);
@@ -585,10 +521,20 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
         return tpm_get_random(chip, data, max);
  }
  
+static bool tpm_is_hwrng_enabled(struct tpm_chip *chip)
+{
+       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
+               return false;
+       if (tpm_is_firmware_upgrade(chip))
+               return false;
+       if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED)
+               return false;
+       return true;
+}
+
  static int tpm_add_hwrng(struct tpm_chip *chip)
  {
-       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) ||
-           tpm_amd_is_rng_defective(chip))
+       if (!tpm_is_hwrng_enabled(chip))
                 return 0;
  
         snprintf(chip->hwrng_name, sizeof(chip->hwrng_name),
@@ -693,7 +639,7 @@ int tpm_chip_register(struct tpm_chip *chip)
         return 0;
  
  out_hwrng:
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip))
+       if (tpm_is_hwrng_enabled(chip))
                 hwrng_unregister(&chip->hwrng);
  out_ppi:
         tpm_bios_log_teardown(chip);
@@ -718,8 +664,7 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
  void tpm_chip_unregister(struct tpm_chip *chip)
  {
         tpm_del_legacy_sysfs(chip);
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip) &&
-           !tpm_amd_is_rng_defective(chip))
+       if (tpm_is_hwrng_enabled(chip))
                 hwrng_unregister(&chip->hwrng);
         tpm_bios_log_teardown(chip);
         if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c

index 1a5d09b..9eb1a18 100644 (file)
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -463,6 +463,28 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
         return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
  }
  
+static int crb_check_flags(struct tpm_chip *chip)
+{
+       u32 val;
+       int ret;
+
+       ret = crb_request_locality(chip, 0);
+       if (ret)
+               return ret;
+
+       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
+       if (ret)
+               goto release;
+
+       if (val == 0x414D4400U /* AMD */)
+               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+
+release:
+       crb_relinquish_locality(chip, 0);
+
+       return ret;
+}
+
  static const struct tpm_class_ops tpm_crb = {
         .flags = TPM_OPS_AUTO_STARTUP,
         .status = crb_status,
@@ -800,6 +822,14 @@ static int crb_acpi_add(struct acpi_device *device)
         chip->acpi_dev_handle = device->handle;
         chip->flags = TPM_CHIP_FLAG_TPM2;
  
+       rc = tpm_chip_bootstrap(chip);
+       if (rc)
+               goto out;
+
+       rc = crb_check_flags(chip);
+       if (rc)
+               goto out;
+
         rc = tpm_chip_register(chip);
  
  out:
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c

index cc42cf3..ac4daaf 100644 (file)
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -164,6 +164,22 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
         },
         {
                 .callback = tpm_tis_disable_irq,
+               .ident = "ThinkStation P620",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P620"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
+               .ident = "TUXEDO InfinityBook S 15/17 Gen7",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "TUXEDO InfinityBook S 15/17 Gen7"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
                 .ident = "UPX-TGL",
                 .matches = {
                         DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c

index 81fba0d..9a1e194 100644 (file)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1012,8 +1012,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
         return 0;
  }
  
-static ssize_t show_status(struct kobject *kobj,
-                          struct kobj_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
  {
         ssize_t ret;
  
@@ -1024,7 +1024,7 @@ static ssize_t show_status(struct kobject *kobj,
         return ret;
  }
  
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+static ssize_t status_store(struct device *a, struct device_attribute *b,
                             const char *buf, size_t count)
  {
         char *p = memchr(buf, '\n', count);
@@ -1043,7 +1043,7 @@ cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
  cpufreq_freq_attr_ro(amd_pstate_highest_perf);
  cpufreq_freq_attr_rw(energy_performance_preference);
  cpufreq_freq_attr_ro(energy_performance_available_preferences);
-define_one_global_rw(status);
+static DEVICE_ATTR_RW(status);
  
  static struct freq_attr *amd_pstate_attr[] = {
         &amd_pstate_max_freq,
@@ -1062,7 +1062,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
  };
  
  static struct attribute *pstate_global_attributes[] = {
-       &status.attr,
+       &dev_attr_status.attr,
         NULL
  };
  
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c

index c2d6d9c..b88af12 100644 (file)
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -120,20 +120,6 @@ static void psci_pd_remove(void)
         }
  }
  
-static bool psci_pd_try_set_osi_mode(void)
-{
-       int ret;
-
-       if (!psci_has_osi_support())
-               return false;
-
-       ret = psci_set_osi_mode(true);
-       if (ret)
-               return false;
-
-       return true;
-}
-
  static void psci_cpuidle_domain_sync_state(struct device *dev)
  {
         /*
@@ -152,15 +138,12 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
  {
         struct device_node *np = pdev->dev.of_node;
         struct device_node *node;
-       bool use_osi;
+       bool use_osi = psci_has_osi_support();
         int ret = 0, pd_count = 0;
  
         if (!np)
                 return -ENODEV;
  
-       /* If OSI mode is supported, let's try to enable it. */
-       use_osi = psci_pd_try_set_osi_mode();
-
         /*
          * Parse child nodes for the "#power-domain-cells" property and
          * initialize a genpd/genpd-of-provider pair when it's found.
@@ -170,33 +153,37 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
                         continue;
  
                 ret = psci_pd_init(node, use_osi);
-               if (ret)
-                       goto put_node;
+               if (ret) {
+                       of_node_put(node);
+                       goto exit;
+               }
  
                 pd_count++;
         }
  
         /* Bail out if not using the hierarchical CPU topology. */
         if (!pd_count)
-               goto no_pd;
+               return 0;
  
         /* Link genpd masters/subdomains to model the CPU topology. */
         ret = dt_idle_pd_init_topology(np);
         if (ret)
                 goto remove_pd;
  
+       /* let's try to enable OSI. */
+       ret = psci_set_osi_mode(use_osi);
+       if (ret)
+               goto remove_pd;
+
         pr_info("Initialized CPU PM domain topology using %s mode\n",
                 use_osi ? "OSI" : "PC");
         return 0;
  
-put_node:
-       of_node_put(node);
  remove_pd:
+       dt_idle_pd_remove_topology(np);
         psci_pd_remove();
+exit:
         pr_err("failed to create CPU PM domains ret=%d\n", ret);
-no_pd:
-       if (use_osi)
-               psci_set_osi_mode(false);
         return ret;
  }
  
diff --git a/drivers/cpuidle/dt_idle_genpd.c b/drivers/cpuidle/dt_idle_genpd.c

index b371655..1af63c1 100644 (file)
--- a/drivers/cpuidle/dt_idle_genpd.c
+++ b/drivers/cpuidle/dt_idle_genpd.c
@@ -152,6 +152,30 @@ int dt_idle_pd_init_topology(struct device_node *np)
         return 0;
  }
  
+int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       struct device_node *node;
+       struct of_phandle_args child, parent;
+       int ret;
+
+       for_each_child_of_node(np, node) {
+               if (of_parse_phandle_with_args(node, "power-domains",
+                                       "#power-domain-cells", 0, &parent))
+                       continue;
+
+               child.np = node;
+               child.args_count = 0;
+               ret = of_genpd_remove_subdomain(&parent, &child);
+               of_node_put(parent.np);
+               if (ret) {
+                       of_node_put(node);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
  struct device *dt_idle_attach_cpu(int cpu, const char *name)
  {
         struct device *dev;
diff --git a/drivers/cpuidle/dt_idle_genpd.h b/drivers/cpuidle/dt_idle_genpd.h

index a95483d..3be1f70 100644 (file)
--- a/drivers/cpuidle/dt_idle_genpd.h
+++ b/drivers/cpuidle/dt_idle_genpd.h
@@ -14,6 +14,8 @@ struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
  
  int dt_idle_pd_init_topology(struct device_node *np);
  
+int dt_idle_pd_remove_topology(struct device_node *np);
+
  struct device *dt_idle_attach_cpu(int cpu, const char *name);
  
  void dt_idle_detach_cpu(struct device *dev);
@@ -36,6 +38,11 @@ static inline int dt_idle_pd_init_topology(struct device_node *np)
         return 0;
  }
  
+static inline int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       return 0;
+}
+
  static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
  {
         return NULL;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig

index 644c188..08fdd0e 100644 (file)
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -211,6 +211,7 @@ config FSL_DMA
  config FSL_EDMA
         tristate "Freescale eDMA engine support"
         depends on OF
+       depends on HAS_IOMEM
         select DMA_ENGINE
         select DMA_VIRTUAL_CHANNELS
         help
@@ -280,6 +281,7 @@ config IMX_SDMA
  
  config INTEL_IDMA64
         tristate "Intel integrated DMA 64-bit support"
+       depends on HAS_IOMEM
         select DMA_ENGINE
         select DMA_VIRTUAL_CHANNELS
         help
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c

index 5abbcc6..9a15f0d 100644 (file)
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -384,9 +384,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
         wq->threshold = 0;
         wq->priority = 0;
         wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
-       clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
-       clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
-       clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+       wq->flags = 0;
         memset(wq->name, 0, WQ_NAME_SIZE);
         wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
         idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c

index ebd8733..9413fad 100644 (file)
--- a/drivers/dma/mcf-edma.c
+++ b/drivers/dma/mcf-edma.c
@@ -190,7 +190,13 @@ static int mcf_edma_probe(struct platform_device *pdev)
                 return -EINVAL;
         }
  
-       chans = pdata->dma_channels;
+       if (!pdata->dma_channels) {
+               dev_info(&pdev->dev, "setting default channel number to 64");
+               chans = 64;
+       } else {
+               chans = pdata->dma_channels;
+       }
+
         len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
         mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
         if (!mcf_edma)
@@ -202,11 +208,6 @@ static int mcf_edma_probe(struct platform_device *pdev)
         mcf_edma->drvdata = &mcf_data;
         mcf_edma->big_endian = 1;
  
-       if (!mcf_edma->n_chans) {
-               dev_info(&pdev->dev, "setting default channel number to 64");
-               mcf_edma->n_chans = 64;
-       }
-
         mutex_init(&mcf_edma->fsl_edma_mutex);
  
         mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c

index 95a462a..b6e0ac8 100644 (file)
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -192,7 +192,7 @@ struct owl_dma_pchan {
  };
  
  /**
- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
   * @vc: wrapped virtual channel
   * @pchan: the physical channel utilized by this channel
   * @txd: active transaction on this channel
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c

index b4731fe..3cf0b38 100644 (file)
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -404,6 +404,12 @@ enum desc_status {
          */
         BUSY,
         /*
+        * Pause was called while descriptor was BUSY. Due to hardware
+        * limitations, only termination is possible for descriptors
+        * that have been paused.
+        */
+       PAUSED,
+       /*
          * Sitting on the channel work_list but xfer done
          * by PL330 core
          */
@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
         list_for_each_entry(desc, &pch->work_list, node) {
  
                 /* If already submitted */
-               if (desc->status == BUSY)
+               if (desc->status == BUSY || desc->status == PAUSED)
                         continue;
  
                 ret = pl330_submit_req(pch->thread, desc);
@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
  {
         struct dma_pl330_chan *pch = to_pchan(chan);
         struct pl330_dmac *pl330 = pch->dmac;
+       struct dma_pl330_desc *desc;
         unsigned long flags;
  
         pm_runtime_get_sync(pl330->ddma.dev);
@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
         _stop(pch->thread);
         spin_unlock(&pl330->lock);
  
+       list_for_each_entry(desc, &pch->work_list, node) {
+               if (desc->status == BUSY)
+                       desc->status = PAUSED;
+       }
         spin_unlock_irqrestore(&pch->lock, flags);
         pm_runtime_mark_last_busy(pl330->ddma.dev);
         pm_runtime_put_autosuspend(pl330->ddma.dev);
@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                 else if (running && desc == running)
                         transferred =
                                 pl330_get_current_xferred_count(pch, desc);
-               else if (desc->status == BUSY)
+               else if (desc->status == BUSY || desc->status == PAUSED)
                         /*
                          * Busy but not running means either just enqueued,
                          * or finished and not yet marked done
@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                         case DONE:
                                 ret = DMA_COMPLETE;
                                 break;
+                       case PAUSED:
+                               ret = DMA_PAUSED;
+                               break;
                         case PREP:
                         case BUSY:
                                 ret = DMA_IN_PROGRESS;
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c

index 93ee298..e0bfd12 100644 (file)
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -668,6 +668,8 @@ static int xdma_set_vector_reg(struct xdma_device *xdev, u32 vec_tbl_start,
                         val |= irq_start << shift;
                         irq_start++;
                         irq_num--;
+                       if (!irq_num)
+                               break;
                 }
  
                 /* write IRQ register */
@@ -715,7 +717,7 @@ static int xdma_irq_init(struct xdma_device *xdev)
                 ret = request_irq(irq, xdma_channel_isr, 0,
                                   "xdma-c2h-channel", &xdev->c2h_chans[j]);
                 if (ret) {
-                       xdma_err(xdev, "H2C channel%d request irq%d failed: %d",
+                       xdma_err(xdev, "C2H channel%d request irq%d failed: %d",
                                  j, irq, ret);
                         goto failed_init_c2h;
                 }
@@ -892,7 +894,7 @@ static int xdma_probe(struct platform_device *pdev)
         }
  
         reg_base = devm_ioremap_resource(&pdev->dev, res);
-       if (!reg_base) {
+       if (IS_ERR(reg_base)) {
                 xdma_err(xdev, "ioremap failed");
                 goto failed;
         }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index a3b86b8..6dc950c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
  void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
  int amdgpu_device_pci_reset(struct amdgpu_device *adev);
  bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
  bool amdgpu_device_pcie_dynamic_switching_supported(void);
  bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
  bool amdgpu_device_aspm_support_quirk(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 040f4cb..fb78a8f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
  
         if (!p->gang_size) {
                 ret = -EINVAL;
-               goto free_partial_kdata;
+               goto free_all_kdata;
         }
  
         for (i = 0; i < p->gang_size; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index a2cdde0..45e9d73 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1459,6 +1459,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
  }
  
  /*
+ * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
+ * Disable S/G on such systems until we have a proper fix.
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
+ */
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
+{
+       switch (amdgpu_sg_display) {
+       case -1:
+               break;
+       case 0:
+               return false;
+       case 1:
+               return true;
+       default:
+               return false;
+       }
+       if ((totalram_pages() << (PAGE_SHIFT - 10)) +
+           (adev->gmc.real_vram_size / 1024) >= 64000000) {
+               DRM_WARN("Disabling S/G due to >=64GB RAM\n");
+               return false;
+       }
+       return true;
+}
+
+/*
   * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
   * speed switching. Until we have confirmation from Intel that a specific host
   * supports it, it's safer that we keep it disabled for all.
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index 3a7af59..0451533 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
         case IP_VERSION(11, 0, 3):
                 if ((adev->gfx.me_fw_version >= 1505) &&
                     (adev->gfx.pfp_fw_version >= 1600) &&
-                   (adev->gfx.mec_fw_version >= 512))
-                       adev->gfx.cp_gfx_shadow = true;
+                   (adev->gfx.mec_fw_version >= 512)) {
+                       if (amdgpu_sriov_vf(adev))
+                               adev->gfx.cp_gfx_shadow = true;
+                       else
+                               adev->gfx.cp_gfx_shadow = false;
+               }
                 break;
         default:
                 adev->gfx.cp_gfx_shadow = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c

index e1a392b..af5685f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -137,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
         int ret;
         int retry_loop;
  
+       /* Wait for bootloader to signify that it is ready having bit 31 of
+        * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+        * If there is an error in processing command, bits[7:0] will be set.
+        * This is applicable for PSP v13.0.6 and newer.
+        */
         for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-               /* Wait for bootloader to signify that is
-                   ready having bit 31 of C2PMSG_35 set to 1 */
-               ret = psp_wait_for(psp,
-                                  SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
-                                  0x80000000,
-                                  0x80000000,
-                                  false);
+               ret = psp_wait_for(
+                       psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+                       0x80000000, 0xffffffff, false);
  
                 if (ret == 0)
                         return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c

index 49f40d9..f5a6f56 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
         if (ignore_crat)
                 return true;
  
-#ifndef KFD_SUPPORT_IOMMU_V2
         ret = true;
-#else
-       ret = false;
-#endif
  
         return ret;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c

index 0b3dc75..a53e075 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
  
                 kfd_device_info_set_event_interrupt_class(kfd);
  
-               /* Raven */
-               if (gc_version == IP_VERSION(9, 1, 0) ||
-                   gc_version == IP_VERSION(9, 2, 2))
-                       kfd->device_info.needs_iommu_device = true;
-
                 if (gc_version < IP_VERSION(11, 0, 0)) {
                         /* Navi2x+, Navi1x+ */
                         if (gc_version == IP_VERSION(10, 3, 6))
@@ -233,10 +228,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
                     asic_type != CHIP_TONGA)
                         kfd->device_info.supports_cwsr = true;
  
-               if (asic_type == CHIP_KAVERI ||
-                   asic_type == CHIP_CARRIZO)
-                       kfd->device_info.needs_iommu_device = true;
-
                 if (asic_type != CHIP_HAWAII && !vf)
                         kfd->device_info.needs_pci_atomics = true;
         }
@@ -249,7 +240,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
         uint32_t gfx_target_version = 0;
  
         switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
  #ifdef CONFIG_DRM_AMDGPU_CIK
         case CHIP_KAVERI:
                 gfx_target_version = 70000;
@@ -262,7 +252,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                 if (!vf)
                         f2g = &gfx_v8_kfd2kgd;
                 break;
-#endif
  #ifdef CONFIG_DRM_AMDGPU_CIK
         case CHIP_HAWAII:
                 gfx_target_version = 70001;
@@ -298,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                         gfx_target_version = 90000;
                         f2g = &gfx_v9_kfd2kgd;
                         break;
-#ifdef KFD_SUPPORT_IOMMU_V2
                 /* Raven */
                 case IP_VERSION(9, 1, 0):
                 case IP_VERSION(9, 2, 2):
@@ -306,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                         if (!vf)
                                 f2g = &gfx_v9_kfd2kgd;
                         break;
-#endif
                 /* Vega12 */
                 case IP_VERSION(9, 2, 1):
                         gfx_target_version = 90004;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 2df1538..01192f5 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2538,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
         }
  
         switch (dev->adev->asic_type) {
-       case CHIP_CARRIZO:
-               device_queue_manager_init_vi(&dqm->asic_ops);
-               break;
-
         case CHIP_KAVERI:
-               device_queue_manager_init_cik(&dqm->asic_ops);
-               break;
-
         case CHIP_HAWAII:
                 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
                 break;
  
+       case CHIP_CARRIZO:
         case CHIP_TONGA:
         case CHIP_FIJI:
         case CHIP_POLARIS10:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index 0fa739f..e5554a3 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1638,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                 }
                 break;
         }
-       if (init_data.flags.gpu_vm_support &&
-           (amdgpu_sg_display == 0))
-               init_data.flags.gpu_vm_support = false;
+       if (init_data.flags.gpu_vm_support)
+               init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
  
         if (init_data.flags.gpu_vm_support)
                 adev->mode_info.gpu_vm_support = true;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c

index 9bc86de..b885c39 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1320,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
                 if (computed_streams[i])
                         continue;
  
-               if (!res_pool->funcs->remove_stream_from_ctx ||
+               if (res_pool->funcs->remove_stream_from_ctx &&
                     res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
                         return -EINVAL;
  
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c

index 20d4d08..6966420 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready(
         dal_gpio_destroy_irq(&hpd);
  
         /* ensure that the panel is detected */
-       ASSERT(edp_hpd_high);
+       if (!edp_hpd_high)
+               DC_LOG_DC("%s: wait timed out!\n", __func__);
  }
  
  void dce110_edp_power_control(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c

index e5b7ef7..50dc834 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes(
         int cur_rom_en = 0;
  
         if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
-               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
-               cur_rom_en = 1;
+               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
+               if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+                       cur_rom_en = 1;
+               }
+       }
  
         REG_UPDATE_3(CURSOR0_CONTROL,
                         CUR0_MODE, color_format,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

index ce41a83..222af2f 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu)
  
         /*
          * For SMU 13.0.4/11, PMFW will handle the features disablement properly
-        * for gpu reset case. Driver involvement is unnecessary.
+        * for gpu reset and S0i3 cases. Driver involvement is unnecessary.
          */
-       if (amdgpu_in_reset(adev)) {
+       if (amdgpu_in_reset(adev) || adev->in_s0ix) {
                 switch (adev->ip_versions[MP1_HWIP][0]) {
                 case IP_VERSION(13, 0, 4):
                 case IP_VERSION(13, 0, 11):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c

index 3d18861..fddcd83 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -331,11 +331,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
         struct smu_13_0_0_powerplay_table *powerplay_table =
                 table_context->power_play_table;
         struct smu_baco_context *smu_baco = &smu->smu_baco;
+#if 0
         PPTable_t *pptable = smu->smu_table.driver_pptable;
         const OverDriveLimits_t * const overdrive_upperlimits =
                                 &pptable->SkuTable.OverDriveLimitsBasicMax;
         const OverDriveLimits_t * const overdrive_lowerlimits =
                                 &pptable->SkuTable.OverDriveLimitsMin;
+#endif
  
         if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
                 smu->dc_controlled_by_gpio = true;
@@ -347,18 +349,27 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
         if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
                 smu_baco->maco_support = true;
  
+       /*
+        * We are in the transition to a new OD mechanism.
+        * Disable the OD feature support for SMU13 temporarily.
+        * TODO: get this reverted when new OD mechanism online
+        */
+#if 0
         if (!overdrive_lowerlimits->FeatureCtrlMask ||
             !overdrive_upperlimits->FeatureCtrlMask)
                 smu->od_enabled = false;
  
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
         /*
          * Instead of having its own buffer space and get overdrive_table copied,
          * smu->od_settings just points to the actual overdrive_table
          */
         smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
  
         return 0;
  }
@@ -1140,7 +1151,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                 (OverDriveTableExternal_t *)smu->smu_table.overdrive_table;
         struct smu_13_0_dpm_table *single_dpm_table;
         struct smu_13_0_pcie_table *pcie_table;
-       const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
         uint32_t gen_speed, lane_width;
         int i, curr_freq, size = 0;
         int32_t min_value, max_value;
@@ -1256,7 +1266,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                                         (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
                                         pcie_table->clk_freq[i],
                                         (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
-                                       (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+                                       (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
                                         "*" : "");
                 break;
  
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index 1ac5521..fe4ee2d 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1993,9 +1993,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
  
         gpu_metrics->average_socket_power =
                 SMUQ10_TO_UINT(metrics->SocketPower);
-       /* Energy is reported in 15.625mJ units */
-       gpu_metrics->energy_accumulator =
-               SMUQ10_TO_UINT(metrics->SocketEnergyAcc);
+       /* Energy counter reported in 15.259uJ (2^-16) units */
+       gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
  
         gpu_metrics->current_gfxclk =
                 SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

index b1f0937..62f2886 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
         struct smu_baco_context *smu_baco = &smu->smu_baco;
         PPTable_t *smc_pptable = table_context->driver_pptable;
         BoardTable_t *BoardTable = &smc_pptable->BoardTable;
+#if 0
         const OverDriveLimits_t * const overdrive_upperlimits =
                                 &smc_pptable->SkuTable.OverDriveLimitsBasicMax;
         const OverDriveLimits_t * const overdrive_lowerlimits =
                                 &smc_pptable->SkuTable.OverDriveLimitsMin;
+#endif
  
         if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
                 smu->dc_controlled_by_gpio = true;
@@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
         if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
                 smu_baco->maco_support = true;
  
+#if 0
         if (!overdrive_lowerlimits->FeatureCtrlMask ||
             !overdrive_upperlimits->FeatureCtrlMask)
                 smu->od_enabled = false;
  
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
         /*
          * Instead of having its own buffer space and get overdrive_table copied,
          * smu->od_settings just points to the actual overdrive_table
          */
         smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
  
         return 0;
  }
diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c

index 504d51c..aadb396 100644 (file)
--- a/drivers/gpu/drm/bridge/ite-it6505.c
+++ b/drivers/gpu/drm/bridge/ite-it6505.c
@@ -2517,9 +2517,11 @@ static irqreturn_t it6505_int_threaded_handler(int unused, void *data)
         };
         int int_status[3], i;
  
-       if (it6505->enable_drv_hold || pm_runtime_get_if_in_use(dev) <= 0)
+       if (it6505->enable_drv_hold || !it6505->powered)
                 return IRQ_HANDLED;
  
+       pm_runtime_get_sync(dev);
+
         int_status[0] = it6505_read(it6505, INT_STATUS_01);
         int_status[1] = it6505_read(it6505, INT_STATUS_02);
         int_status[2] = it6505_read(it6505, INT_STATUS_03);
diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c

index 5163e52..9663601 100644 (file)
--- a/drivers/gpu/drm/bridge/lontium-lt9611.c
+++ b/drivers/gpu/drm/bridge/lontium-lt9611.c
@@ -774,9 +774,7 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611,
         dsi->lanes = 4;
         dsi->format = MIPI_DSI_FMT_RGB888;
         dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
-                         MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
-                         MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
-                         MIPI_DSI_MODE_NO_EOT_PACKET;
+                         MIPI_DSI_MODE_VIDEO_HSE;
  
         ret = devm_mipi_dsi_attach(dev, dsi);
         if (ret < 0) {
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c

index 4ea6507..baaf0e0 100644 (file)
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -623,7 +623,13 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
         int ret;
  
         if (obj->import_attach) {
+               /* Reset both vm_ops and vm_private_data, so we don't end up with
+                * vm_ops pointing to our implementation if the dma-buf backend
+                * doesn't set those fields.
+                */
                 vma->vm_private_data = NULL;
+               vma->vm_ops = NULL;
+
                 ret = dma_buf_mmap(obj->dma_buf, vma, 0);
  
                 /* Drop the reference drm_gem_mmap_obj() acquired.*/
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c

index f75c6f0..a2e0033 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -967,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
         /* Determine display colour depth for everything except LVDS now,
          * DP requires this before mode_valid() is called.
          */
-       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
                 nouveau_connector_detect_depth(connector);
  
         /* Find the native mode if this is a digital panel, if we didn't
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c

index 40c8ea4..b8ac66b 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -26,6 +26,8 @@
  #include "head.h"
  #include "ior.h"
  
+#include <drm/display/drm_dp.h>
+
  #include <subdev/bios.h>
  #include <subdev/bios/init.h>
  #include <subdev/gpio.h>
@@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
         return outp->dp.rates != 0;
  }
  
+/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps()
+ * converted to work inside nvkm. This is a temporary holdover until we start
+ * passing the drm_dp_aux device through NVKM
+ */
+static int
+nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp)
+{
+       struct nvkm_i2c_aux *aux = outp->dp.aux;
+       u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
+       int ret;
+
+       ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Prior to DP1.3 the bit represented by
+        * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved.
+        * If it is set DP_DPCD_REV at 0000h could be at a value less than
+        * the true capability of the panel. The only way to check is to
+        * then compare 0000h and 2200h.
+        */
+       if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+             DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT))
+               return 0;
+
+       ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext));
+       if (ret < 0)
+               return ret;
+
+       if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
+               OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n",
+                        outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]);
+               return 0;
+       }
+
+       if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)))
+               return 0;
+
+       memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext));
+
+       return 0;
+}
+
  void
  nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
  {
@@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
                         memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr));
                 }
  
-               if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) {
+               if (!nvkm_dp_read_dpcd_caps(outp)) {
                         const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 };
                         const u8 *rate;
                         int rate_max;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h

index 00dbeda..de161e7 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
  
  extern const struct gf100_grctx_func gk110_grctx;
  void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
  
  extern const struct gf100_grctx_func gk110b_grctx;
  extern const struct gf100_grctx_func gk208_grctx;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c

index 94233d0..52a234b 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -906,7 +906,9 @@ static void
  gk104_grctx_generate_r419f78(struct gf100_gr *gr)
  {
         struct nvkm_device *device = gr->base.engine.subdev.device;
-       nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
  }
  
  void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c

index 4391458..3acdd9e 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
         nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
  }
  
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+       struct nvkm_device *device = gr->base.engine.subdev.device;
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
  const struct gf100_grctx_func
  gk110_grctx = {
         .main  = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
         .r418800 = gk104_grctx_generate_r418800,
         .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
  };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c

index 7b9a34f..5597e87 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -103,4 +103,5 @@ gk110b_grctx = {
         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
         .r418800 = gk104_grctx_generate_r418800,
         .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
  };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c

index c78d07a..6126564 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -568,4 +568,5 @@ gk208_grctx = {
         .dist_skip_table = gf117_grctx_generate_dist_skip_table,
         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
         .r418800 = gk104_grctx_generate_r418800,
+       .r419f78 = gk110_grctx_generate_r419f78,
  };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c

index beac66e..9906974 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -988,4 +988,5 @@ gm107_grctx = {
         .r406500 = gm107_grctx_generate_r406500,
         .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
         .r419e00 = gm107_grctx_generate_r419e00,
+       .r419f78 = gk110_grctx_generate_r419f78,
  };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c

index 3b6c810..a7775aa 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
         return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
  }
  
-int
-tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
-{
-       int ret;
-
-       ret = gm200_gr_load(gr, ver, fwif);
-       if (ret)
-               return ret;
-
-       return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
-                                &gr->bundle_veid);
-}
-
  static const struct gf100_gr_fwif
  tu102_gr_fwif[] = {
         {  0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c

index a530ecc..bf34498 100644 (file)
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -833,12 +833,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
          * need align with 2 pixel.
          */
         if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) {
-               DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n");
                 return -EINVAL;
         }
  
         if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) {
-               DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n");
                 return -EINVAL;
         }
  
@@ -846,7 +846,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                 struct vop *vop = to_vop(crtc);
  
                 if (!vop->data->afbc) {
-                       DRM_ERROR("vop does not support AFBC\n");
+                       DRM_DEBUG_KMS("vop does not support AFBC\n");
                         return -EINVAL;
                 }
  
@@ -855,15 +855,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                         return ret;
  
                 if (new_plane_state->src.x1 || new_plane_state->src.y1) {
-                       DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n",
-                                 new_plane_state->src.x1,
-                                 new_plane_state->src.y1, fb->offsets[0]);
+                       DRM_DEBUG_KMS("AFBC does not support offset display, " \
+                                     "xpos=%d, ypos=%d, offset=%d\n",
+                                     new_plane_state->src.x1, new_plane_state->src.y1,
+                                     fb->offsets[0]);
                         return -EINVAL;
                 }
  
                 if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) {
-                       DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
-                                 new_plane_state->rotation);
+                       DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n",
+                                     new_plane_state->rotation);
                         return -EINVAL;
                 }
         }
diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c

index a997dbc..0238078 100644 (file)
--- a/drivers/hwmon/aquacomputer_d5next.c
+++ b/drivers/hwmon/aquacomputer_d5next.c
@@ -13,9 +13,11 @@
  
  #include <linux/crc16.h>
  #include <linux/debugfs.h>
+#include <linux/delay.h>
  #include <linux/hid.h>
  #include <linux/hwmon.h>
  #include <linux/jiffies.h>
+#include <linux/ktime.h>
  #include <linux/module.h>
  #include <linux/mutex.h>
  #include <linux/seq_file.h>
@@ -63,6 +65,8 @@ static const char *const aqc_device_names[] = {
  #define CTRL_REPORT_ID                 0x03
  #define AQUAERO_CTRL_REPORT_ID         0x0b
  
+#define CTRL_REPORT_DELAY              200     /* ms */
+
  /* The HID report that the official software always sends
   * after writing values, currently same for all devices
   */
@@ -527,6 +531,9 @@ struct aqc_data {
         int secondary_ctrl_report_size;
         u8 *secondary_ctrl_report;
  
+       ktime_t last_ctrl_report_op;
+       int ctrl_report_delay;  /* Delay between two ctrl report operations, in ms */
+
         int buffer_size;
         u8 *buffer;
         int checksum_start;
@@ -611,17 +618,35 @@ static int aqc_aquastreamxt_convert_fan_rpm(u16 val)
         return 0;
  }
  
+static void aqc_delay_ctrl_report(struct aqc_data *priv)
+{
+       /*
+        * If previous read or write is too close to this one, delay the current operation
+        * to give the device enough time to process the previous one.
+        */
+       if (priv->ctrl_report_delay) {
+               s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op);
+
+               if (delta < priv->ctrl_report_delay)
+                       msleep(priv->ctrl_report_delay - delta);
+       }
+}
+
  /* Expects the mutex to be locked */
  static int aqc_get_ctrl_data(struct aqc_data *priv)
  {
         int ret;
  
+       aqc_delay_ctrl_report(priv);
+
         memset(priv->buffer, 0x00, priv->buffer_size);
         ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                  HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
         if (ret < 0)
                 ret = -ENODATA;
  
+       priv->last_ctrl_report_op = ktime_get();
+
         return ret;
  }
  
@@ -631,6 +656,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
         int ret;
         u16 checksum;
  
+       aqc_delay_ctrl_report(priv);
+
         /* Checksum is not needed for Aquaero */
         if (priv->kind != aquaero) {
                 /* Init and xorout value for CRC-16/USB is 0xffff */
@@ -646,12 +673,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
         ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                  HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
         if (ret < 0)
-               return ret;
+               goto record_access_and_ret;
  
         /* The official software sends this report after every change, so do it here as well */
         ret = hid_hw_raw_request(priv->hdev, priv->secondary_ctrl_report_id,
                                  priv->secondary_ctrl_report, priv->secondary_ctrl_report_size,
                                  HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+
+record_access_and_ret:
+       priv->last_ctrl_report_op = ktime_get();
+
         return ret;
  }
  
@@ -1524,6 +1555,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
  
                 priv->buffer_size = AQUAERO_CTRL_REPORT_SIZE;
                 priv->temp_ctrl_offset = AQUAERO_TEMP_CTRL_OFFSET;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
  
                 priv->temp_label = label_temp_sensors;
                 priv->virtual_temp_label = label_virtual_temp_sensors;
@@ -1547,6 +1579,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                 priv->temp_ctrl_offset = D5NEXT_TEMP_CTRL_OFFSET;
  
                 priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
  
                 priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES;
  
@@ -1597,6 +1630,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                 priv->temp_ctrl_offset = OCTO_TEMP_CTRL_OFFSET;
  
                 priv->buffer_size = OCTO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
  
                 priv->power_cycle_count_offset = OCTO_POWER_CYCLES;
  
@@ -1624,6 +1658,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                 priv->temp_ctrl_offset = QUADRO_TEMP_CTRL_OFFSET;
  
                 priv->buffer_size = QUADRO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
  
                 priv->flow_pulses_ctrl_offset = QUADRO_FLOW_PULSES_CTRL_OFFSET;
                 priv->power_cycle_count_offset = QUADRO_POWER_CYCLES;
diff --git a/drivers/hwmon/pmbus/bel-pfe.c b/drivers/hwmon/pmbus/bel-pfe.c

index fa5070a..7c5f4b1 100644 (file)
--- a/drivers/hwmon/pmbus/bel-pfe.c
+++ b/drivers/hwmon/pmbus/bel-pfe.c
@@ -17,12 +17,13 @@
  enum chips {pfe1100, pfe3000};
  
  /*
- * Disable status check for pfe3000 devices, because some devices report
- * communication error (invalid command) for VOUT_MODE command (0x20)
- * although correct VOUT_MODE (0x16) is returned: it leads to incorrect
- * exponent in linear mode.
+ * Disable status check because some devices report communication error
+ * (invalid command) for VOUT_MODE command (0x20) although the correct
+ * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear
+ * mode.
+ * This affects both pfe3000 and pfe1100.
   */
-static struct pmbus_platform_data pfe3000_plat_data = {
+static struct pmbus_platform_data pfe_plat_data = {
         .flags = PMBUS_SKIP_STATUS_CHECK,
  };
  
@@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client)
         int model;
  
         model = (int)i2c_match_id(pfe_device_id, client)->driver_data;
+       client->dev.platform_data = &pfe_plat_data;
  
         /*
          * PFE3000-12-069RA devices may not stay in page 0 during device
          * probe which leads to probe failure (read status word failed).
          * So let's set the device to page 0 at the beginning.
          */
-       if (model == pfe3000) {
-               client->dev.platform_data = &pfe3000_plat_data;
+       if (model == pfe3000)
                 i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
-       }
  
         return pmbus_do_probe(client, &pfe_driver_info[model]);
  }
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h

index fa09d51..baf3125 100644 (file)
--- a/drivers/isdn/mISDN/dsp.h
+++ b/drivers/isdn/mISDN/dsp.h
@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
  extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
  extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
  extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_send(struct timer_list *arg);
  extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
  extern int dsp_cmx_del_conf_member(struct dsp *dsp);
  extern int dsp_cmx_del_conf(struct dsp_conf *conf);
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c

index 357b875..61cb45c 100644 (file)
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -1614,7 +1614,7 @@ static u16        dsp_count; /* last sample count */
  static int     dsp_count_valid; /* if we have last sample count */
  
  void
-dsp_cmx_send(void *arg)
+dsp_cmx_send(struct timer_list *arg)
  {
         struct dsp_conf *conf;
         struct dsp_conf_member *member;
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c

index 3860845..fae95f1 100644 (file)
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
         }
  
         /* set sample timer */
-       timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
+       timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
         dsp_spl_tl.expires = jiffies + dsp_tics;
         dsp_spl_jiffies = dsp_spl_tl.expires;
         add_timer(&dsp_spl_tl);
diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c

index 7f0802a..3418d2d 100644 (file)
--- a/drivers/media/platform/qcom/venus/hfi_cmds.c
+++ b/drivers/media/platform/qcom/venus/hfi_cmds.c
@@ -251,8 +251,8 @@ int pkt_session_unset_buffers(struct hfi_session_release_buffer_pkt *pkt,
  
                 pkt->extradata_size = 0;
                 pkt->shdr.hdr.size =
-                       struct_size((struct hfi_session_set_buffers_pkt *)0,
-                                   buffer_info, bd->num_buffers);
+                       struct_size_t(struct hfi_session_set_buffers_pkt,
+                                     buffer_info, bd->num_buffers);
         }
  
         pkt->response_req = bd->response_required;
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c

index 2d002c8..d0d6ffc 100644 (file)
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -338,13 +338,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
                                 return;
                         }
                         for (len = 0; len < remain && len < host->fifo_width;) {
-                               /* SCR data must be read in big endian. */
-                               if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
-                                       *sgp = ioread32be(host->base +
-                                                         REG_DATA_WINDOW);
-                               else
-                                       *sgp = ioread32(host->base +
-                                                       REG_DATA_WINDOW);
+                               *sgp = ioread32(host->base + REG_DATA_WINDOW);
                                 sgp++;
                                 len += 4;
                         }
diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c

index a202a69..b01ffb4 100644 (file)
--- a/drivers/mmc/host/sdhci_f_sdh30.c
+++ b/drivers/mmc/host/sdhci_f_sdh30.c
@@ -29,9 +29,16 @@ struct f_sdhost_priv {
         bool enable_cmd_dat_delay;
  };
  
+static void *sdhci_f_sdhost_priv(struct sdhci_host *host)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+       return sdhci_pltfm_priv(pltfm_host);
+}
+
  static void sdhci_f_sdh30_soft_voltage_switch(struct sdhci_host *host)
  {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
         u32 ctrl = 0;
  
         usleep_range(2500, 3000);
@@ -64,7 +71,7 @@ static unsigned int sdhci_f_sdh30_get_min_clock(struct sdhci_host *host)
  
  static void sdhci_f_sdh30_reset(struct sdhci_host *host, u8 mask)
  {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
         u32 ctl;
  
         if (sdhci_readw(host, SDHCI_CLOCK_CONTROL) == 0)
@@ -95,30 +102,32 @@ static const struct sdhci_ops sdhci_f_sdh30_ops = {
         .set_uhs_signaling = sdhci_set_uhs_signaling,
  };
  
+static const struct sdhci_pltfm_data sdhci_f_sdh30_pltfm_data = {
+       .ops = &sdhci_f_sdh30_ops,
+       .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
+               | SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
+       .quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE
+               |  SDHCI_QUIRK2_TUNING_WORK_AROUND,
+};
+
  static int sdhci_f_sdh30_probe(struct platform_device *pdev)
  {
         struct sdhci_host *host;
         struct device *dev = &pdev->dev;
-       int irq, ctrl = 0, ret = 0;
+       int ctrl = 0, ret = 0;
         struct f_sdhost_priv *priv;
+       struct sdhci_pltfm_host *pltfm_host;
         u32 reg = 0;
  
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
-               return irq;
-
-       host = sdhci_alloc_host(dev, sizeof(struct f_sdhost_priv));
+       host = sdhci_pltfm_init(pdev, &sdhci_f_sdh30_pltfm_data,
+                               sizeof(struct f_sdhost_priv));
         if (IS_ERR(host))
                 return PTR_ERR(host);
  
-       priv = sdhci_priv(host);
+       pltfm_host = sdhci_priv(host);
+       priv = sdhci_pltfm_priv(pltfm_host);
         priv->dev = dev;
  
-       host->quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
-                      SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
-       host->quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE |
-                       SDHCI_QUIRK2_TUNING_WORK_AROUND;
-
         priv->enable_cmd_dat_delay = device_property_read_bool(dev,
                                                 "fujitsu,cmd-dat-delay-select");
  
@@ -126,18 +135,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
         if (ret)
                 goto err;
  
-       platform_set_drvdata(pdev, host);
-
-       host->hw_name = "f_sdh30";
-       host->ops = &sdhci_f_sdh30_ops;
-       host->irq = irq;
-
-       host->ioaddr = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(host->ioaddr)) {
-               ret = PTR_ERR(host->ioaddr);
-               goto err;
-       }
-
         if (dev_of_node(dev)) {
                 sdhci_get_of_property(pdev);
  
@@ -204,24 +201,21 @@ err_rst:
  err_clk:
         clk_disable_unprepare(priv->clk_iface);
  err:
-       sdhci_free_host(host);
+       sdhci_pltfm_free(pdev);
+
         return ret;
  }
  
  static int sdhci_f_sdh30_remove(struct platform_device *pdev)
  {
         struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct f_sdhost_priv *priv = sdhci_priv(host);
-
-       sdhci_remove_host(host, readl(host->ioaddr + SDHCI_INT_STATUS) ==
-                         0xffffffff);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
  
         reset_control_assert(priv->rst);
         clk_disable_unprepare(priv->clk);
         clk_disable_unprepare(priv->clk_iface);
  
-       sdhci_free_host(host);
-       platform_set_drvdata(pdev, NULL);
+       sdhci_pltfm_unregister(pdev);
  
         return 0;
  }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 484c9e3..447b06e 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5901,7 +5901,9 @@ void bond_setup(struct net_device *bond_dev)
  
         bond_dev->hw_features = BOND_VLAN_FEATURES |
                                 NETIF_F_HW_VLAN_CTAG_RX |
-                               NETIF_F_HW_VLAN_CTAG_FILTER;
+                               NETIF_F_HW_VLAN_CTAG_FILTER |
+                               NETIF_F_HW_VLAN_STAG_RX |
+                               NETIF_F_HW_VLAN_STAG_FILTER;
  
         bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
         bond_dev->features |= bond_dev->hw_features;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c

index 8da46d2..bef879c 100644 (file)
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1625,8 +1625,10 @@ static void felix_teardown(struct dsa_switch *ds)
         struct felix *felix = ocelot_to_felix(ocelot);
         struct dsa_port *dp;
  
+       rtnl_lock();
         if (felix->tag_proto_ops)
                 felix->tag_proto_ops->teardown(ds);
+       rtnl_unlock();
  
         dsa_switch_for_each_available_port(dp, ds)
                 ocelot_deinit_port(ocelot, dp->index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c

index 1416262..e0a4cb7 100644 (file)
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1186,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
  
  static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
  {
-       struct device_node *node = pdev->dev.of_node;
         struct platform_device *ierb_pdev;
         struct device_node *ierb_node;
  
-       /* Don't register with the IERB if the PF itself is disabled */
-       if (!node || !of_device_is_available(node))
-               return 0;
-
         ierb_node = of_find_compatible_node(NULL, NULL,
                                             "fsl,ls1028a-enetc-ierb");
         if (!ierb_node || !of_device_is_available(ierb_node))
@@ -1208,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
         return enetc_ierb_register_pf(ierb_pdev, pdev);
  }
  
-static int enetc_pf_probe(struct pci_dev *pdev,
-                         const struct pci_device_id *ent)
+static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
  {
-       struct device_node *node = pdev->dev.of_node;
-       struct enetc_ndev_priv *priv;
-       struct net_device *ndev;
         struct enetc_si *si;
-       struct enetc_pf *pf;
         int err;
  
-       err = enetc_pf_register_with_ierb(pdev);
-       if (err == -EPROBE_DEFER)
-               return err;
-       if (err)
-               dev_warn(&pdev->dev,
-                        "Could not register with IERB driver: %pe, please update the device tree\n",
-                        ERR_PTR(err));
-
-       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
-       if (err)
-               return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
+       if (err) {
+               dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+               goto out;
+       }
  
         si = pci_get_drvdata(pdev);
         if (!si->hw.port || !si->hw.global) {
                 err = -ENODEV;
                 dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
-               goto err_map_pf_space;
+               goto out_pci_remove;
         }
  
         err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
                                &si->cbd_ring);
         if (err)
-               goto err_setup_cbdr;
+               goto out_pci_remove;
  
         err = enetc_init_port_rfs_memory(si);
         if (err) {
                 dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
-               goto err_init_port_rfs;
+               goto out_teardown_cbdr;
         }
  
         err = enetc_init_port_rss_memory(si);
         if (err) {
                 dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
-               goto err_init_port_rss;
+               goto out_teardown_cbdr;
         }
  
-       if (node && !of_device_is_available(node)) {
-               dev_info(&pdev->dev, "device is disabled, skipping\n");
-               err = -ENODEV;
-               goto err_device_disabled;
+       return si;
+
+out_teardown_cbdr:
+       enetc_teardown_cbdr(&si->cbd_ring);
+out_pci_remove:
+       enetc_pci_remove(pdev);
+out:
+       return ERR_PTR(err);
+}
+
+static void enetc_psi_destroy(struct pci_dev *pdev)
+{
+       struct enetc_si *si = pci_get_drvdata(pdev);
+
+       enetc_teardown_cbdr(&si->cbd_ring);
+       enetc_pci_remove(pdev);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+                         const struct pci_device_id *ent)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_ndev_priv *priv;
+       struct net_device *ndev;
+       struct enetc_si *si;
+       struct enetc_pf *pf;
+       int err;
+
+       err = enetc_pf_register_with_ierb(pdev);
+       if (err == -EPROBE_DEFER)
+               return err;
+       if (err)
+               dev_warn(&pdev->dev,
+                        "Could not register with IERB driver: %pe, please update the device tree\n",
+                        ERR_PTR(err));
+
+       si = enetc_psi_create(pdev);
+       if (IS_ERR(si)) {
+               err = PTR_ERR(si);
+               goto err_psi_create;
         }
  
         pf = enetc_si_priv(si);
@@ -1339,15 +1359,9 @@ err_alloc_si_res:
         si->ndev = NULL;
         free_netdev(ndev);
  err_alloc_netdev:
-err_init_port_rss:
-err_init_port_rfs:
-err_device_disabled:
  err_setup_mac_addresses:
-       enetc_teardown_cbdr(&si->cbd_ring);
-err_setup_cbdr:
-err_map_pf_space:
-       enetc_pci_remove(pdev);
-
+       enetc_psi_destroy(pdev);
+err_psi_create:
         return err;
  }
  
@@ -1370,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev)
         enetc_free_msix(priv);
  
         enetc_free_si_resources(priv);
-       enetc_teardown_cbdr(&si->cbd_ring);
  
         free_netdev(si->ndev);
  
-       enetc_pci_remove(pdev);
+       enetc_psi_destroy(pdev);
+}
+
+static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_si *si;
+
+       /* Only apply quirk for disabled functions. For the ones
+        * that are enabled, enetc_pf_probe() will apply it.
+        */
+       if (node && of_device_is_available(node))
+               return;
+
+       si = enetc_psi_create(pdev);
+       if (si)
+               enetc_psi_destroy(pdev);
  }
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+                       enetc_fixup_clear_rss_rfs);
  
  static const struct pci_device_id enetc_pf_id_table[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c

index 52546f6..f276b5e 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -464,9 +464,9 @@ static void hns3_dbg_fill_content(char *content, u16 len,
                 if (result) {
                         if (item_len < strlen(result[i]))
                                 break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                 } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                 }
                 pos += item_len;
                 len -= item_len;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c

index 9f68900..b7b51e5 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
         if (!if_running)
                 return;
  
+       if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
         netif_carrier_off(ndev);
         netif_tx_disable(ndev);
  
@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
         if (!if_running)
                 return;
  
-       hns3_nic_reset_all_ring(priv->ae_handle);
+       if (hns3_nic_resetting(ndev))
+               return;
+
+       if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
+       if (hns3_nic_reset_all_ring(priv->ae_handle))
+               return;
+
+       clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
  
         for (i = 0; i < priv->vector_num; i++)
                 hns3_vector_enable(&priv->tqp_vector[i]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c

index 409db2e..0fb2eae 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char *content, u16 len,
                 if (result) {
                         if (item_len < strlen(result[i]))
                                 break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                 } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                 }
                 pos += item_len;
                 len -= item_len;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

index bf675c1..a940e35 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev);
  static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
  static void hclge_sync_fd_table(struct hclge_dev *hdev);
  static void hclge_update_fec_stats(struct hclge_dev *hdev);
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt);
  
  static struct hnae3_ae_algo ae_algo;
  
@@ -7558,6 +7560,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
  
  static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
  {
+#define HCLGE_LINK_STATUS_WAIT_CNT  3
+
         struct hclge_desc desc;
         struct hclge_config_mac_mode_cmd *req =
                 (struct hclge_config_mac_mode_cmd *)desc.data;
@@ -7582,9 +7586,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
         req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
  
         ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret)
+       if (ret) {
                 dev_err(&hdev->pdev->dev,
                         "mac enable fail, ret =%d.\n", ret);
+               return;
+       }
+
+       if (!enable)
+               hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
+                                          HCLGE_LINK_STATUS_WAIT_CNT);
  }
  
  static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
@@ -7647,10 +7657,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
         } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
  }
  
-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt)
  {
-#define HCLGE_MAC_LINK_STATUS_NUM  100
-
         int link_status;
         int i = 0;
         int ret;
@@ -7663,13 +7672,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
                         return 0;
  
                 msleep(HCLGE_LINK_STATUS_MS);
-       } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+       } while (++i < wait_cnt);
         return -EBUSY;
  }
  
  static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
                                           bool is_phy)
  {
+#define HCLGE_MAC_LINK_STATUS_NUM  100
+
         int link_ret;
  
         link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
@@ -7677,7 +7688,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
         if (is_phy)
                 hclge_phy_link_status_wait(hdev, link_ret);
  
-       return hclge_mac_link_status_wait(hdev, link_ret);
+       return hclge_mac_link_status_wait(hdev, link_ret,
+                                         HCLGE_MAC_LINK_STATUS_NUM);
  }
  
  static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
@@ -10915,9 +10927,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
         u32 rx_pause, tx_pause;
         u8 flowctl;
  
-       if (!phydev->link || !phydev->autoneg)
+       if (!phydev->link)
                 return 0;
  
+       if (!phydev->autoneg)
+               return hclge_mac_pause_setup_hw(hdev);
+
         local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
  
         if (phydev->pause)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c

index de509e5..c58c312 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1553,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
         return 0;
  }
  
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
  {
         bool tx_en, rx_en;
  
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h

index 45dcfef..53eec6d 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -245,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
                            u8 pfc_bitmap);
  int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
  int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
  void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
  void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
  int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c

index 763d613..df76cda 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
  static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
                                         struct ibmvnic_sub_crq_queue *);
  static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
  static void send_query_map(struct ibmvnic_adapter *adapter);
  static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
  static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
  static void free_long_term_buff(struct ibmvnic_adapter *adapter,
                                 struct ibmvnic_long_term_buff *ltb);
  static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
  
  struct ibmvnic_stat {
         char name[ETH_GSTRING_LEN];
@@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
  
  static int ibmvnic_login(struct net_device *netdev)
  {
+       unsigned long flags, timeout = msecs_to_jiffies(20000);
         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       unsigned long timeout = msecs_to_jiffies(20000);
         int retry_count = 0;
         int retries = 10;
         bool retry;
@@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev)
  
                 if (!wait_for_completion_timeout(&adapter->init_done,
                                                  timeout)) {
-                       netdev_warn(netdev, "Login timed out, retrying...\n");
-                       retry = true;
-                       adapter->init_done_rc = 0;
-                       retry_count++;
-                       continue;
+                       netdev_warn(netdev, "Login timed out\n");
+                       adapter->login_pending = false;
+                       goto partial_reset;
                 }
  
                 if (adapter->init_done_rc == ABORTED) {
@@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev)
                                             "SCRQ irq initialization failed\n");
                                 return rc;
                         }
+               /* Default/timeout error handling, reset and start fresh */
                 } else if (adapter->init_done_rc) {
                         netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
                                     adapter->init_done_rc);
-                       return -EIO;
+
+partial_reset:
+                       /* adapter login failed, so free any CRQs or sub-CRQs
+                        * and register again before attempting to login again.
+                        * If we don't do this then the VIOS may think that
+                        * we are already logged in and reject any subsequent
+                        * attempts
+                        */
+                       netdev_warn(netdev,
+                                   "Freeing and re-registering CRQs before attempting to login again\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+                       release_sub_crqs(adapter, true);
+                       /* Much of this is similar logic as ibmvnic_probe(),
+                        * we are essentially re-initializing communication
+                        * with the server. We really should not run any
+                        * resets/failovers here because this is already a form
+                        * of reset and we do not want parallel resets occurring
+                        */
+                       do {
+                               reinit_init_done(adapter);
+                               /* Clear any failovers we got in the previous
+                                * pass since we are re-initializing the CRQ
+                                */
+                               adapter->failover_pending = false;
+                               release_crq_queue(adapter);
+                               /* If we don't sleep here then we risk an
+                                * unnecessary failover event from the VIOS.
+                                * This is a known VIOS issue caused by a vnic
+                                * device freeing and registering a CRQ too
+                                * quickly.
+                                */
+                               msleep(1500);
+                               /* Avoid any resets, since we are currently
+                                * resetting.
+                                */
+                               spin_lock_irqsave(&adapter->rwi_lock, flags);
+                               flush_reset_queue(adapter);
+                               spin_unlock_irqrestore(&adapter->rwi_lock,
+                                                      flags);
+
+                               rc = init_crq_queue(adapter);
+                               if (rc) {
+                                       netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+                                                  rc);
+                                       return -EIO;
+                               }
+
+                               rc = ibmvnic_reset_init(adapter, false);
+                               if (rc)
+                                       netdev_err(netdev, "login recovery: Reset init failed %d\n",
+                                                  rc);
+                               /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+                                * fails, since ibmvnic_reset_init will free
+                                * irq's in failure, we won't be able to receive
+                                * new CRQs so we need to keep trying. probe()
+                                * handles this similarly.
+                                */
+                       } while (rc == -EAGAIN && retry_count++ < retries);
                 }
         } while (retry);
  
@@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev)
  
  static void release_login_buffer(struct ibmvnic_adapter *adapter)
  {
+       if (!adapter->login_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+                        adapter->login_buf_sz, DMA_TO_DEVICE);
         kfree(adapter->login_buf);
         adapter->login_buf = NULL;
  }
  
  static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
  {
+       if (!adapter->login_rsp_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
         kfree(adapter->login_rsp_buf);
         adapter->login_rsp_buf = NULL;
  }
@@ -4830,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
         if (rc) {
                 adapter->login_pending = false;
                 netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
-               goto buf_rsp_map_failed;
+               goto buf_send_failed;
         }
  
         return 0;
  
+buf_send_failed:
+       dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+                        DMA_FROM_DEVICE);
  buf_rsp_map_failed:
         kfree(login_rsp_buffer);
         adapter->login_rsp_buf = NULL;
@@ -5396,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
         int num_tx_pools;
         int num_rx_pools;
         u64 *size_array;
+       u32 rsp_len;
         int i;
  
         /* CHECK: Test/set of login_pending does not need to be atomic
@@ -5407,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
         }
         adapter->login_pending = false;
  
-       dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
-                        DMA_TO_DEVICE);
-       dma_unmap_single(dev, adapter->login_rsp_buf_token,
-                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
         /* If the number of queues requested can't be allocated by the
          * server, the login response will return with code 1. We will need
          * to resend the login buffer with fewer queues requested.
@@ -5447,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
                 ibmvnic_reset(adapter, VNIC_RESET_FATAL);
                 return -EIO;
         }
+
+       rsp_len = be32_to_cpu(login_rsp->len);
+       if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+           rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+               /* This can happen if a login request times out and there are
+                * 2 outstanding login requests sent, the LOGIN_RSP crq
+                * could have been for the older login request. So we are
+                * parsing the newer response buffer which may be incomplete
+                */
+               dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
+
         size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
                 be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
         /* variable buffer sizes are not supported, so just read the
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c

index 2f47cfa..460ca56 100644 (file)
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1401,14 +1401,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
         if (fsp->flow_type & FLOW_MAC_EXT)
                 return -EINVAL;
  
+       spin_lock_bh(&adapter->fdir_fltr_lock);
         if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                 dev_err(&adapter->pdev->dev,
                         "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
                         IAVF_MAX_FDIR_FILTERS);
                 return -ENOSPC;
         }
  
-       spin_lock_bh(&adapter->fdir_fltr_lock);
         if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
                 dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
                 spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1781,7 +1782,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
         case ETHTOOL_GRXCLSRLCNT:
                 if (!FDIR_FLTR_SUPPORT(adapter))
                         break;
+               spin_lock_bh(&adapter->fdir_fltr_lock);
                 cmd->rule_cnt = adapter->fdir_active_fltr;
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                 cmd->data = IAVF_MAX_FDIR_FILTERS;
                 ret = 0;
                 break;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c

index 6146203..505e82e 100644 (file)
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -722,7 +722,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
  bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
  {
         struct iavf_fdir_fltr *tmp;
+       bool ret = false;
  
+       spin_lock_bh(&adapter->fdir_fltr_lock);
         list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
                 if (tmp->flow_type != fltr->flow_type)
                         continue;
@@ -732,11 +734,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
                     !memcmp(&tmp->ip_data, &fltr->ip_data,
                             sizeof(fltr->ip_data)) &&
                     !memcmp(&tmp->ext_data, &fltr->ext_data,
-                           sizeof(fltr->ext_data)))
-                       return true;
+                           sizeof(fltr->ext_data))) {
+                       ret = true;
+                       break;
+               }
         }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
  
-       return false;
+       return ret;
  }
  
  /**
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h

index 9db384f..38901d2 100644 (file)
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -195,6 +195,10 @@ struct igc_adapter {
         u32 qbv_config_change_errors;
         bool qbv_transition;
         unsigned int qbv_count;
+       /* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+        * are protected by the qbv_tx_lock.
+        */
+       spinlock_t qbv_tx_lock;
  
         /* OS defined structs */
         struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c

index bdeb367..6f557e8 100644 (file)
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4801,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
         adapter->nfc_rule_count = 0;
  
         spin_lock_init(&adapter->stats64_lock);
+       spin_lock_init(&adapter->qbv_tx_lock);
         /* Assume MSI-X interrupts, will be checked during IRQ allocation */
         adapter->flags |= IGC_FLAG_HAS_MSIX;
  
@@ -6119,15 +6120,15 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
         return igc_tsn_offload_apply(adapter);
  }
  
-static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
  {
+       unsigned long flags;
         int i;
  
         adapter->base_time = 0;
         adapter->cycle_time = NSEC_PER_SEC;
         adapter->taprio_offload_enable = false;
         adapter->qbv_config_change_errors = 0;
-       adapter->qbv_transition = false;
         adapter->qbv_count = 0;
  
         for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -6136,10 +6137,28 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
                 ring->start_time = 0;
                 ring->end_time = NSEC_PER_SEC;
                 ring->max_sdu = 0;
+       }
+
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+       adapter->qbv_transition = false;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
                 ring->oper_gate_closed = false;
                 ring->admin_gate_closed = false;
         }
  
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+       return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+       igc_qbv_clear_schedule(adapter);
+
         return 0;
  }
  
@@ -6150,6 +6169,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
         struct igc_hw *hw = &adapter->hw;
         u32 start_time = 0, end_time = 0;
         struct timespec64 now;
+       unsigned long flags;
         size_t n;
         int i;
  
@@ -6217,6 +6237,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                 start_time += e->interval;
         }
  
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
         /* Check whether a queue gets configured.
          * If not, set the start and end time to be end time.
          */
@@ -6241,6 +6263,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                 }
         }
  
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
         for (i = 0; i < adapter->num_tx_queues; i++) {
                 struct igc_ring *ring = adapter->tx_ring[i];
                 struct net_device *dev = adapter->netdev;
@@ -6619,8 +6643,11 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
  {
         struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
                                                    hrtimer);
+       unsigned long flags;
         unsigned int i;
  
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
         adapter->qbv_transition = true;
         for (i = 0; i < adapter->num_tx_queues; i++) {
                 struct igc_ring *tx_ring = adapter->tx_ring[i];
@@ -6633,6 +6660,9 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
                 }
         }
         adapter->qbv_transition = false;
+
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
         return HRTIMER_NORESTART;
  }
  
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c

index a9a1028..de31717 100644 (file)
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n,
  
  static bool __prestera_fi_is_direct(struct fib_info *fi)
  {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
  
         if (fib_info_num_path(fi) == 1) {
-               fib_nh = fib_info_nh(fi, 0);
-               if (fib_nh->fib_nh_gw_family == AF_UNSPEC)
+               fib_nhc = fib_info_nhc(fi, 0);
+               if (fib_nhc->nhc_gw_family == AF_UNSPEC)
                         return true;
         }
  
@@ -261,7 +261,7 @@ static bool
  __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
                                        struct net_device *dev)
  {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
         struct fib_result res;
         bool reachable;
  
@@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
  
         if (!prestera_util_kern_get_route(&res, tb_id, addr))
                 if (prestera_fi_is_direct(res.fi)) {
-                       fib_nh = fib_info_nh(res.fi, 0);
-                       if (dev == fib_nh->fib_nh_dev)
+                       fib_nhc = fib_info_nhc(res.fi, 0);
+                       if (dev == fib_nhc->nhc_dev)
                                 reachable = true;
                 }
  
@@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n)
         if (info->family == AF_INET) {
                 fen4_info = container_of(info, struct fib_entry_notifier_info,
                                          info);
-               return &fib_info_nh(fen4_info->fi, n)->nh_common;
+               return fib_info_nhc(fen4_info->fi, n);
         } else if (info->family == AF_INET6) {
                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
                                          info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c

index b012833..e869c65 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -2,6 +2,7 @@
  /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
  
  #include "reporter_vnic.h"
+#include "en_stats.h"
  #include "devlink.h"
  
  #define VNIC_ENV_GET64(vnic_env_stats, c) \
@@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
         if (err)
                 return err;
  
-       err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
-                                       VNIC_ENV_GET64(&vnic, total_error_queues));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
-                                       VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, comp_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, async_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
-                                       VNIC_ENV_GET64(&vnic, cq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
-                                       VNIC_ENV_GET64(&vnic, invalid_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
-                                       VNIC_ENV_GET64(&vnic, quota_exceeded_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
-                                       VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
-       if (err)
-               return err;
+       if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues",
+                                               VNIC_ENV_GET(&vnic, total_error_queues));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow",
+                                               VNIC_ENV_GET(&vnic,
+                                                            send_queue_priority_update_flow));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, eq_overrun_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, comp_eq_overrun));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, async_eq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun",
+                                               VNIC_ENV_GET(&vnic, cq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, invalid_command_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command",
+                                               VNIC_ENV_GET(&vnic, invalid_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, quota_exceeded_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command",
+                                               VNIC_ENV_GET(&vnic, quota_exceeded_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              nic_receive_steering_discard));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              generated_pkt_steering_fail));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+               if (err)
+                       return err;
+       }
  
         err = devlink_fmsg_obj_nest_end(fmsg);
         if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c

index 0c88cf4..1730f6a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1461,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
                 attr = mlx5e_tc_get_encap_attr(flow);
                 esw_attr = attr->esw_attr;
  
-               if (flow_flag_test(flow, SLOW))
+               if (flow_flag_test(flow, SLOW)) {
                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
-               else
+               } else {
                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+                       mlx5e_tc_unoffload_flow_post_acts(flow);
+               }
  
                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
                 attr->modify_hdr = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index 1c82011..c27df14 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5266,6 +5266,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
  static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                           struct net_device *netdev)
  {
+       const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
         struct mlx5e_priv *priv = netdev_priv(netdev);
         struct mlx5e_flow_steering *fs;
         int err;
@@ -5294,9 +5295,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                 mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
  
         mlx5e_health_create_reporters(priv);
+
+       /* If netdev is already registered (e.g. move from uplink to nic profile),
+        * RTNL lock must be held before triggering netdev notifiers.
+        */
+       if (take_rtnl)
+               rtnl_lock();
+
         /* update XDP supported features */
         mlx5e_set_xdp_feature(netdev);
  
+       if (take_rtnl)
+               rtnl_unlock();
+
         return 0;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 9237763..31708d5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1943,9 +1943,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
  {
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
         struct mlx5_flow_attr *attr = flow->attr;
-       struct mlx5_esw_flow_attr *esw_attr;
  
-       esw_attr = attr->esw_attr;
         mlx5e_put_flow_tunnel_id(flow);
  
         remove_unready_flow(flow);
@@ -1966,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
  
         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
  
-       if (esw_attr->int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
-
-       if (esw_attr->dest_int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
-
         if (flow_flag_test(flow, L3_TO_L2_DECAP))
                 mlx5e_detach_decap(priv, flow);
  
@@ -4268,6 +4260,7 @@ static void
  mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
  {
         struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+       struct mlx5_esw_flow_attr *esw_attr;
  
         if (!attr)
                 return;
@@ -4285,6 +4278,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
                 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
         }
  
+       if (mlx5e_is_eswitch_flow(flow)) {
+               esw_attr = attr->esw_attr;
+
+               if (esw_attr->int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->int_port);
+
+               if (esw_attr->dest_int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->dest_int_port);
+       }
+
         mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
  
         free_branch_attr(flow, attr->branch_true);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c

index af779c7..fdf2be5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -60,7 +60,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
         }  else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
                 memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
                 dl_port->attrs.switch_id.id_len = ppid.id_len;
-               devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+               devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
                                               vport_num - 1, false);
         }
         return dl_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c

index d3a3fe4..7d9bbb4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
         for (i = 0; i < ldev->ports; i++) {
                 for (j = 0; j < ldev->buckets; j++) {
                         idx = i * ldev->buckets + j;
-                       if (ldev->v2p_map[i] == ports[i])
+                       if (ldev->v2p_map[idx] == ports[idx])
                                 continue;
  
                         dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c

index 973babf..377372f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -227,10 +227,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
         clock = container_of(timer, struct mlx5_clock, timer);
         mdev = container_of(clock, struct mlx5_core_dev, clock);
  
+       if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
+
         write_seqlock_irqsave(&clock->lock, flags);
         timecounter_read(&timer->tc);
         mlx5_update_clock_info_page(mdev);
         write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
         schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c

index f42abc2..72ae560 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1989,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
  
         mlx5_enter_error_state(dev, false);
         mlx5_error_sw_reset(dev);
-       mlx5_unload_one(dev, true);
+       mlx5_unload_one(dev, false);
         mlx5_drain_health_wq(dev);
         mlx5_pci_disable_device(dev);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

index c4be257..682d3dc 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -361,7 +361,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16
  
  static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
  {
-       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1
                           : vport;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c

index 4e42a3b..a2fc937 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
                 host_total_vfs = MLX5_GET(query_esw_functions_out, out,
                                           host_params_context.host_total_vfs);
                 kvfree(out);
-               if (host_total_vfs)
-                       return host_total_vfs;
+               return host_total_vfs;
         }
  
  done:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c

index d6947fe..8ca534e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
         u32 chunk_size;
         u32 index;
  
-       chunk_size = ilog2(num_of_actions);
+       chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
         /* HW modify action index granularity is at least 64B */
         chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
  
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c

index a499e46..c2ad092 100644 (file)
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -8,6 +8,7 @@
  #include <linux/ethtool.h>
  #include <linux/filter.h>
  #include <linux/mm.h>
+#include <linux/pci.h>
  
  #include <net/checksum.h>
  #include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
  static int mana_dealloc_queues(struct net_device *ndev)
  {
         struct mana_port_context *apc = netdev_priv(ndev);
+       unsigned long timeout = jiffies + 120 * HZ;
         struct gdma_dev *gd = apc->ac->gdma_dev;
         struct mana_txq *txq;
+       struct sk_buff *skb;
         int i, err;
+       u32 tsleep;
  
         if (apc->port_is_up)
                 return -EINVAL;
@@ -2363,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev)
          * to false, but it doesn't matter since mana_start_xmit() drops any
          * new packets due to apc->port_is_up being false.
          *
-        * Drain all the in-flight TX packets
+        * Drain all the in-flight TX packets.
+        * A timeout of 120 seconds for all the queues is used.
+        * This will break the while loop when h/w is not responding.
+        * This value of 120 has been decided here considering max
+        * number of queues.
          */
+
         for (i = 0; i < apc->num_queues; i++) {
                 txq = &apc->tx_qp[i].txq;
-
-               while (atomic_read(&txq->pending_sends) > 0)
-                       usleep_range(1000, 2000);
+               tsleep = 1000;
+               while (atomic_read(&txq->pending_sends) > 0 &&
+                      time_before(jiffies, timeout)) {
+                       usleep_range(tsleep, tsleep + 1000);
+                       tsleep <<= 1;
+               }
+               if (atomic_read(&txq->pending_sends)) {
+                       err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+                       if (err) {
+                               netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+                                          err, atomic_read(&txq->pending_sends),
+                                          txq->gdma_txq_id);
+                       }
+                       break;
+               }
         }
  
+       for (i = 0; i < apc->num_queues; i++) {
+               txq = &apc->tx_qp[i].txq;
+               while ((skb = skb_dequeue(&txq->pending_skbs))) {
+                       mana_unmap_skb(skb, apc);
+                       dev_kfree_skb_any(skb);
+               }
+               atomic_set(&txq->pending_sends, 0);
+       }
         /* We're 100% sure the queues can no longer be woken up, because
          * we're sure now mana_poll_tx_cq() can't be running.
          */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c

index 612b001..432fb93 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1817,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
  static void ionic_tx_timeout_work(struct work_struct *ws)
  {
         struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+       int err;
  
         if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                 return;
@@ -1829,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
  
         mutex_lock(&lif->queue_lock);
         ionic_stop_queues_reconfig(lif);
-       ionic_start_queues_reconfig(lif);
+       err = ionic_start_queues_reconfig(lif);
         mutex_unlock(&lif->queue_lock);
+
+       if (err)
+               dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__);
  }
  
  static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -2800,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif,
                         if (err) {
                                 dev_err(lif->ionic->dev,
                                         "CMB restore failed: %d\n", err);
-                               goto errout;
+                               goto err_out;
                         }
                 }
  
-               ionic_start_queues_reconfig(lif);
-       } else {
-               /* This was detached in ionic_stop_queues_reconfig() */
-               netif_device_attach(lif->netdev);
+               err = ionic_start_queues_reconfig(lif);
+               if (err) {
+                       dev_err(lif->ionic->dev,
+                               "CMB reconfig failed: %d\n", err);
+                       goto err_out;
+               }
         }
  
-errout:
+err_out:
+       /* This was detached in ionic_stop_queues_reconfig() */
+       netif_device_attach(lif->netdev);
+
         return err;
  }
  
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c

index 984dfa5..144ec75 100644 (file)
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -743,7 +743,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                 u64_stats_update_begin(&rxsc_stats->syncp);
                 rxsc_stats->stats.InPktsLate++;
                 u64_stats_update_end(&rxsc_stats->syncp);
-               secy->netdev->stats.rx_dropped++;
+               DEV_STATS_INC(secy->netdev, rx_dropped);
                 return false;
         }
  
@@ -767,7 +767,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                         rxsc_stats->stats.InPktsNotValid++;
                         u64_stats_update_end(&rxsc_stats->syncp);
                         this_cpu_inc(rx_sa->stats->InPktsNotValid);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                         return false;
                 }
  
@@ -1069,7 +1069,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
                         u64_stats_update_begin(&secy_stats->syncp);
                         secy_stats->stats.InPktsNoTag++;
                         u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                         continue;
                 }
  
@@ -1179,7 +1179,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                 u64_stats_update_begin(&secy_stats->syncp);
                 secy_stats->stats.InPktsBadTag++;
                 u64_stats_update_end(&secy_stats->syncp);
-               secy->netdev->stats.rx_errors++;
+               DEV_STATS_INC(secy->netdev, rx_errors);
                 goto drop_nosa;
         }
  
@@ -1196,7 +1196,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                         u64_stats_update_begin(&rxsc_stats->syncp);
                         rxsc_stats->stats.InPktsNotUsingSA++;
                         u64_stats_update_end(&rxsc_stats->syncp);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                         if (active_rx_sa)
                                 this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
                         goto drop_nosa;
@@ -1230,7 +1230,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                         u64_stats_update_begin(&rxsc_stats->syncp);
                         rxsc_stats->stats.InPktsLate++;
                         u64_stats_update_end(&rxsc_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                         goto drop;
                 }
         }
@@ -1271,7 +1271,7 @@ deliver:
         if (ret == NET_RX_SUCCESS)
                 count_rx(dev, len);
         else
-               macsec->secy.netdev->stats.rx_dropped++;
+               DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
  
         rcu_read_unlock();
  
@@ -1308,7 +1308,7 @@ nosci:
                         u64_stats_update_begin(&secy_stats->syncp);
                         secy_stats->stats.InPktsNoSCI++;
                         u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_errors++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_errors);
                         continue;
                 }
  
@@ -1327,7 +1327,7 @@ nosci:
                         secy_stats->stats.InPktsUnknownSCI++;
                         u64_stats_update_end(&secy_stats->syncp);
                 } else {
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                 }
         }
  
@@ -3422,7 +3422,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
  
         if (!secy->operational) {
                 kfree_skb(skb);
-               dev->stats.tx_dropped++;
+               DEV_STATS_INC(dev, tx_dropped);
                 return NETDEV_TX_OK;
         }
  
@@ -3430,7 +3430,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
         skb = macsec_encrypt(skb, dev);
         if (IS_ERR(skb)) {
                 if (PTR_ERR(skb) != -EINPROGRESS)
-                       dev->stats.tx_dropped++;
+                       DEV_STATS_INC(dev, tx_dropped);
                 return NETDEV_TX_OK;
         }
  
@@ -3667,9 +3667,9 @@ static void macsec_get_stats64(struct net_device *dev,
  
         dev_fetch_sw_netstats(s, dev->tstats);
  
-       s->rx_dropped = dev->stats.rx_dropped;
-       s->tx_dropped = dev->stats.tx_dropped;
-       s->rx_errors = dev->stats.rx_errors;
+       s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
+       s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
+       s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
  }
  
  static int macsec_get_iflink(const struct net_device *dev)
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c

index c1f307d..8a77ec3 100644 (file)
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -459,21 +459,27 @@ static int at803x_set_wol(struct phy_device *phydev,
                         phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
                                       mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
  
-               /* Enable WOL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               0, AT803X_WOL_EN);
-               if (ret)
-                       return ret;
+               /* Enable WOL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            0, AT803X_WOL_EN);
+                       if (ret)
+                               return ret;
+               }
                 /* Enable WOL interrupt */
                 ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
                 if (ret)
                         return ret;
         } else {
-               /* Disable WoL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               AT803X_WOL_EN, 0);
-               if (ret)
-                       return ret;
+               /* Disable WoL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            AT803X_WOL_EN, 0);
+                       if (ret)
+                               return ret;
+               }
                 /* Disable WOL interrupt */
                 ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
                 if (ret)
@@ -508,11 +514,11 @@ static void at803x_get_wol(struct phy_device *phydev,
         wol->supported = WAKE_MAGIC;
         wol->wolopts = 0;
  
-       value = phy_read_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL);
+       value = phy_read(phydev, AT803X_INTR_ENABLE);
         if (value < 0)
                 return;
  
-       if (value & AT803X_WOL_EN)
+       if (value & AT803X_INTR_ENABLE_WOL)
                 wol->wolopts |= WAKE_MAGIC;
  }
  
@@ -858,9 +864,6 @@ static int at803x_probe(struct phy_device *phydev)
         if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                 int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
                 int mode_cfg;
-               struct ethtool_wolinfo wol = {
-                       .wolopts = 0,
-               };
  
                 if (ccr < 0)
                         return ccr;
@@ -877,12 +880,14 @@ static int at803x_probe(struct phy_device *phydev)
                         break;
                 }
  
-               /* Disable WOL by default */
-               ret = at803x_set_wol(phydev, &wol);
-               if (ret < 0) {
-                       phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret);
+               /* Disable WoL in 1588 register which is enabled
+                * by default
+                */
+               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                    AT803X_PHY_MMD3_WOL_CTRL,
+                                    AT803X_WOL_EN, 0);
+               if (ret)
                         return ret;
-               }
         }
  
         return 0;
@@ -2059,8 +2064,6 @@ static struct phy_driver at803x_driver[] = {
         .flags                  = PHY_POLL_CABLE_TEST,
         .config_init            = at803x_config_init,
         .link_change_notify     = at803x_link_change_notify,
-       .set_wol                = at803x_set_wol,
-       .get_wol                = at803x_get_wol,
         .suspend                = at803x_suspend,
         .resume                 = at803x_resume,
         /* PHY_BASIC_FEATURES */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c

index 25f0191..100339b 100644 (file)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1594,7 +1594,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
         if (zerocopy)
                 return false;
  
-       if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+       if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
             SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
                 return false;
  
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c

index a3de081..c3ff30a 100644 (file)
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -713,6 +713,12 @@ static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
         return vninode;
  }
  
+static void vxlan_vni_free(struct vxlan_vni_node *vninode)
+{
+       free_percpu(vninode->stats);
+       kfree(vninode);
+}
+
  static int vxlan_vni_add(struct vxlan_dev *vxlan,
                          struct vxlan_vni_group *vg,
                          u32 vni, union vxlan_addr *group,
@@ -740,7 +746,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
                                             &vninode->vnode,
                                             vxlan_vni_rht_params);
         if (err) {
-               kfree(vninode);
+               vxlan_vni_free(vninode);
                 return err;
         }
  
@@ -763,8 +769,7 @@ static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
         struct vxlan_vni_node *v;
  
         v = container_of(rcu, struct vxlan_vni_node, rcu);
-       free_percpu(v->stats);
-       kfree(v);
+       vxlan_vni_free(v);
  }
  
  static int vxlan_vni_del(struct vxlan_dev *vxlan,
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c

index 5bf7822..0ba714c 100644 (file)
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -6,7 +6,7 @@
  #include "allowedips.h"
  #include "peer.h"
  
-enum { MAX_ALLOWEDIPS_BITS = 128 };
+enum { MAX_ALLOWEDIPS_DEPTH = 129 };
  
  static struct kmem_cache *node_cache;
  
@@ -42,7 +42,7 @@ static void push_rcu(struct allowedips_node **stack,
                      struct allowedips_node __rcu *p, unsigned int *len)
  {
         if (rcu_access_pointer(p)) {
-               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS))
+               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
                         return;
                 stack[(*len)++] = rcu_dereference_raw(p);
         }
@@ -55,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
  
  static void root_free_rcu(struct rcu_head *rcu)
  {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = {
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
                 container_of(rcu, struct allowedips_node, rcu) };
         unsigned int len = 1;
  
@@ -68,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
  
  static void root_remove_peer_lists(struct allowedips_node *root)
  {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root };
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
         unsigned int len = 1;
  
         while (len > 0 && (node = stack[--len])) {
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c

index 78ebe28..3d1f64f 100644 (file)
--- a/drivers/net/wireguard/selftest/allowedips.c
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
         wg_allowedips_remove_by_peer(&t, a, &mutex);
         test_negative(4, a, 192, 168, 0, 1);
  
-       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node
+       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
          * if something goes wrong.
          */
-       for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) {
-               part = cpu_to_be64(~(1LLU << (i % 64)));
-               memset(&ip, 0xff, 16);
-               memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+       for (i = 0; i < 64; ++i) {
+               part = cpu_to_be64(~0LLU << i);
+               memset(&ip, 0xff, 8);
+               memcpy((u8 *)&ip + 8, &part, 8);
+               wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+               memcpy(&ip, &part, 8);
+               memset((u8 *)&ip + 8, 0, 8);
                 wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
         }
-
+       memset(&ip, 0, 16);
+       wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
         wg_allowedips_free(&t, &mutex);
  
         wg_allowedips_init(&t);
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c

index 6512267..4928e4e 100644 (file)
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -2144,8 +2144,7 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar,
         struct wmi_tlv *tlv;
         void *ptr;
         int i, ret, len;
-       u32 *tmp_ptr;
-       u8 extraie_len_with_pad = 0;
+       u32 *tmp_ptr, extraie_len_with_pad = 0;
         struct ath12k_wmi_hint_short_ssid_arg *s_ssid = NULL;
         struct ath12k_wmi_hint_bssid_arg *hint_bssid = NULL;
  
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c

index de8a2e2..2a90bb2 100644 (file)
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1456,6 +1456,10 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                 params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
                 params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
                 params_v1 = kzalloc(params_size, GFP_KERNEL);
+               if (!params_v1) {
+                       err = -ENOMEM;
+                       goto exit_params;
+               }
                 params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
                 brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
                 kfree(params);
@@ -1473,6 +1477,7 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                         bphy_err(drvr, "error (%d)\n", err);
         }
  
+exit_params:
         kfree(params);
  exit:
         return err;
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c

index b114bab..c93e625 100644 (file)
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -2524,7 +2524,7 @@ static int cmac_dma_init(struct rtw89_dev *rtwdev, u8 mac_idx)
         u32 reg;
         int ret;
  
-       if (chip_id != RTL8852A && chip_id != RTL8852B)
+       if (chip_id != RTL8852B)
                 return 0;
  
         ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

index c8d20cd..88f760a 100644 (file)
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -396,7 +396,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
         struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
         struct xen_netif_tx_request *txp = first;
  
-       nr_slots = shinfo->nr_frags + 1;
+       nr_slots = shinfo->nr_frags + frag_overflow + 1;
  
         copy_count(skb) = 0;
         XENVIF_TX_CB(skb)->split_mask = 0;
@@ -462,8 +462,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                 }
         }
  
-       for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
-            shinfo->nr_frags++, gop++) {
+       for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
+            shinfo->nr_frags++, gop++, nr_slots--) {
                 index = pending_index(queue->pending_cons++);
                 pending_idx = queue->pending_ring[index];
                 xenvif_tx_create_map_op(queue, pending_idx, txp,
@@ -476,12 +476,12 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                         txp++;
         }
  
-       if (frag_overflow) {
+       if (nr_slots > 0) {
  
                 shinfo = skb_shinfo(nskb);
                 frags = shinfo->frags;
  
-               for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+               for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
                      shinfo->nr_frags++, txp++, gop++) {
                         index = pending_index(queue->pending_cons++);
                         pending_idx = queue->pending_ring[index];
@@ -492,6 +492,11 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                 }
  
                 skb_shinfo(skb)->frag_list = nskb;
+       } else if (nskb) {
+               /* A frag_list skb was allocated but it is no longer needed
+                * because enough slots were converted to copy ops above.
+                */
+               kfree_skb(nskb);
         }
  
         (*copy_ops) = cop - queue->tx_copy_ops;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 37b6fa7..f3a01b7 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
          */
         nvme_mpath_clear_ctrl_paths(ctrl);
  
+       /*
+        * Unquiesce io queues so any pending IO won't hang, especially
+        * those submitted from scan work
+        */
+       nvme_unquiesce_io_queues(ctrl);
+
         /* prevent racing with ns scanning */
         flush_work(&ctrl->scan_work);
  
@@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
          * removing the namespaces' disks; fail all the queues now to avoid
          * potentially having to clean up the failed sync later.
          */
-       if (ctrl->state == NVME_CTRL_DEAD) {
+       if (ctrl->state == NVME_CTRL_DEAD)
                 nvme_mark_namespaces_dead(ctrl);
-               nvme_unquiesce_io_queues(ctrl);
-       }
  
         /* this is a no-op when called from the controller reset handler */
         nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c

index 5c3250f..d39f321 100644 (file)
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
         if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
                 return 0;
  
-       rcu_read_lock();
         req = READ_ONCE(ioucmd->cookie);
         if (req && blk_rq_is_poll(req))
                 ret = blk_rq_poll(req, iob, poll_flags);
-       rcu_read_unlock();
         return ret;
  }
  #ifdef CONFIG_NVME_MULTIPATH
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index baf69af..2f57da1 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = {
         { PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
                 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
         { PCI_DEVICE(0x144d, 0xa80b),   /* Samsung PM9B1 256G and 512G */
-               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES |
+                               NVME_QUIRK_BOGUS_NID, },
         { PCI_DEVICE(0x144d, 0xa809),   /* Samsung MZALQ256HBJD 256G */
                 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
         { PCI_DEVICE(0x144d, 0xa802),   /* Samsung SM953 */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index d433b2e..337a624 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                 goto out_cleanup_tagset;
  
         if (!new) {
+               nvme_start_freeze(&ctrl->ctrl);
                 nvme_unquiesce_io_queues(&ctrl->ctrl);
                 if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
                         /*
@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                          * to be safe.
                          */
                         ret = -ENODEV;
+                       nvme_unfreeze(&ctrl->ctrl);
                         goto out_wait_freeze_timed_out;
                 }
                 blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                 bool remove)
  {
         if (ctrl->ctrl.queue_count > 1) {
-               nvme_start_freeze(&ctrl->ctrl);
                 nvme_quiesce_io_queues(&ctrl->ctrl);
                 nvme_sync_io_queues(&ctrl->ctrl);
                 nvme_rdma_stop_io_queues(ctrl);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c

index 9ce417c..5b332d9 100644 (file)
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                 goto out_cleanup_connect_q;
  
         if (!new) {
+               nvme_start_freeze(ctrl);
                 nvme_unquiesce_io_queues(ctrl);
                 if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
                         /*
@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                          * to be safe.
                          */
                         ret = -ENODEV;
+                       nvme_unfreeze(ctrl);
                         goto out_wait_freeze_timed_out;
                 }
                 blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
         if (ctrl->queue_count <= 1)
                 return;
         nvme_quiesce_admin_queue(ctrl);
-       nvme_start_freeze(ctrl);
         nvme_quiesce_io_queues(ctrl);
         nvme_sync_io_queues(ctrl);
         nvme_tcp_stop_io_queues(ctrl);
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c

index bf3405f..8b1dcd5 100644 (file)
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -121,6 +121,8 @@ module_param(sba_reserve_agpgart, int, 0444);
  MODULE_PARM_DESC(sba_reserve_agpgart, "Reserve half of IO pdir as AGPGART");
  #endif
  
+struct proc_dir_entry *proc_runway_root __ro_after_init;
+struct proc_dir_entry *proc_mckinley_root __ro_after_init;
  
  /************************************
  ** SBA register read and write support
@@ -1968,11 +1970,15 @@ static int __init sba_driver_callback(struct parisc_device *dev)
  #ifdef CONFIG_PROC_FS
         switch (dev->id.hversion) {
         case PLUTO_MCKINLEY_PORT:
+               if (!proc_mckinley_root)
+                       proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
                 root = proc_mckinley_root;
                 break;
         case ASTRO_RUNWAY_PORT:
         case IKE_MERCED_PORT:
         default:
+               if (!proc_runway_root)
+                       proc_runway_root = proc_mkdir("bus/runway", NULL);
                 root = proc_runway_root;
                 break;
         }
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c

index 5bc81cc..46b252b 100644 (file)
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -11,6 +11,7 @@
  #include <linux/pci.h>
  #include <linux/errno.h>
  #include <linux/ioport.h>
+#include <linux/of.h>
  #include <linux/proc_fs.h>
  #include <linux/slab.h>
  
@@ -332,6 +333,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
   */
  void pci_bus_add_device(struct pci_dev *dev)
  {
+       struct device_node *dn = dev->dev.of_node;
         int retval;
  
         /*
@@ -344,7 +346,7 @@ void pci_bus_add_device(struct pci_dev *dev)
         pci_proc_attach_device(dev);
         pci_bridge_d3_update(dev);
  
-       dev->match_driver = true;
+       dev->match_driver = !dn || of_device_is_available(dn);
         retval = device_attach(&dev->dev);
         if (retval < 0 && retval != -EPROBE_DEFER)
                 pci_warn(dev, "device attach failed (%d)\n", retval);
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig

index 8d49bad..0859be8 100644 (file)
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -179,7 +179,6 @@ config PCI_MVEBU
         depends on MVEBU_MBUS
         depends on ARM
         depends on OF
-       depends on BROKEN
         select PCI_BRIDGE_EMUL
         help
          Add support for Marvell EBU PCIe controller. This PCIe controller
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c

index cf61733..9952057 100644 (file)
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -485,20 +485,15 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
         if (ret)
                 goto err_remove_edma;
  
-       if (dw_pcie_link_up(pci)) {
-               dw_pcie_print_link_status(pci);
-       } else {
+       if (!dw_pcie_link_up(pci)) {
                 ret = dw_pcie_start_link(pci);
                 if (ret)
                         goto err_remove_edma;
-
-               if (pci->ops && pci->ops->start_link) {
-                       ret = dw_pcie_wait_for_link(pci);
-                       if (ret)
-                               goto err_stop_link;
-               }
         }
  
+       /* Ignore errors, the link may come up later */
+       dw_pcie_wait_for_link(pci);
+
         bridge->sysdata = pp;
  
         ret = pci_host_probe(bridge);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c

index c87848c..1f2ee71 100644 (file)
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -644,20 +644,9 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index)
         dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0);
  }
  
-void dw_pcie_print_link_status(struct dw_pcie *pci)
-{
-       u32 offset, val;
-
-       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
-       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
-
-       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
-                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
-                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
-}
-
  int dw_pcie_wait_for_link(struct dw_pcie *pci)
  {
+       u32 offset, val;
         int retries;
  
         /* Check if the link is up or not */
@@ -673,7 +662,12 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
                 return -ETIMEDOUT;
         }
  
-       dw_pcie_print_link_status(pci);
+       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+
+       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
+                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
+                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
  
         return 0;
  }
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h

index 6156606..79713ce 100644 (file)
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -429,7 +429,6 @@ void dw_pcie_setup(struct dw_pcie *pci);
  void dw_pcie_iatu_detect(struct dw_pcie *pci);
  int dw_pcie_edma_detect(struct dw_pcie *pci);
  void dw_pcie_edma_remove(struct dw_pcie *pci);
-void dw_pcie_print_link_status(struct dw_pcie *pci);
  
  static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
  {
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c

index 328d1e4..6011297 100644 (file)
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -498,6 +498,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                 acpiphp_native_scan_bridge(dev);
                 }
         } else {
+               LIST_HEAD(add_list);
                 int max, pass;
  
                 acpiphp_rescan_slot(slot);
@@ -511,10 +512,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                 if (pass && dev->subordinate) {
                                         check_hotplug_bridge(slot, dev);
                                         pcibios_resource_survey_bus(dev->subordinate);
+                                       if (pci_is_root_bus(bus))
+                                               __pci_bus_size_bridges(dev->subordinate, &add_list);
                                 }
                         }
                 }
-               pci_assign_unassigned_bridge_resources(bus->self);
+               if (pci_is_root_bus(bus))
+                       __pci_bus_assign_resources(bus, &add_list, NULL);
+               else
+                       pci_assign_unassigned_bridge_resources(bus->self);
         }
  
         acpiphp_sanitize_bus(bus);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c

index e51219f..3c158b1 100644 (file)
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -34,11 +34,6 @@ int pci_set_of_node(struct pci_dev *dev)
         if (!node)
                 return 0;
  
-       if (!of_device_is_available(node)) {
-               of_node_put(node);
-               return -ENODEV;
-       }
-
         device_set_node(&dev->dev, of_fwnode_handle(node));
         return 0;
  }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c

index 1bf3c44..b43fa8b 100644 (file)
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1578,7 +1578,7 @@ const struct file_operations gfs2_file_fops = {
         .fsync          = gfs2_fsync,
         .lock           = gfs2_lock,
         .flock          = gfs2_flock,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
         .splice_write   = gfs2_file_splice_write,
         .setlease       = simple_nosetlease,
         .fallocate      = gfs2_fallocate,
@@ -1609,7 +1609,7 @@ const struct file_operations gfs2_file_fops_nolock = {
         .open           = gfs2_open,
         .release        = gfs2_release,
         .fsync          = gfs2_fsync,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
         .splice_write   = gfs2_file_splice_write,
         .setlease       = generic_setlease,
         .fallocate      = gfs2_fallocate,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c

index ec16312..7e835be 100644 (file)
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -230,9 +230,11 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
  {
  
         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+       struct super_block *sb = sdp->sd_vfs;
         struct gfs2_bufdata *bd;
         struct gfs2_meta_header *mh;
         struct gfs2_trans *tr = current->journal_info;
+       bool withdraw = false;
  
         lock_buffer(bh);
         if (buffer_pinned(bh)) {
@@ -266,13 +268,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
                        (unsigned long long)bd->bd_bh->b_blocknr);
                 BUG();
         }
-       if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) {
-               fs_info(sdp, "GFS2:adding buf while frozen\n");
-               gfs2_assert_withdraw(sdp, 0);
-       }
         if (unlikely(gfs2_withdrawn(sdp))) {
                 fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
                         (unsigned long long)bd->bd_bh->b_blocknr);
+               goto out_unlock;
+       }
+       if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) {
+               fs_info(sdp, "GFS2:adding buf while frozen\n");
+               withdraw = true;
+               goto out_unlock;
         }
         gfs2_pin(sdp, bd->bd_bh);
         mh->__pad0 = cpu_to_be64(0);
@@ -281,6 +285,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
         tr->tr_num_buf_new++;
  out_unlock:
         gfs2_log_unlock(sdp);
+       if (withdraw)
+               gfs2_assert_withdraw(sdp, 0);
  out:
         unlock_buffer(bh);
  }
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c

index a8ce522..35bc793 100644 (file)
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
  
  int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
  {
+       struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
         struct buffer_head *ibh;
         int err;
  
+       /*
+        * Do not dirty inodes after the log writer has been detached
+        * and its nilfs_root struct has been freed.
+        */
+       if (unlikely(nilfs_purging(nilfs)))
+               return 0;
+
         err = nilfs_load_inode_block(inode, &ibh);
         if (unlikely(err)) {
                 nilfs_warn(inode->i_sb,
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c

index c255302..581691e 100644 (file)
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2845,6 +2845,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
                 nilfs_segctor_destroy(nilfs->ns_writer);
                 nilfs->ns_writer = NULL;
         }
+       set_nilfs_purging(nilfs);
  
         /* Force to free the list of dirty files */
         spin_lock(&nilfs->ns_inode_lock);
@@ -2857,4 +2858,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
         up_write(&nilfs->ns_segctor_sem);
  
         nilfs_dispose_list(nilfs, &garbage_list, 1);
+       clear_nilfs_purging(nilfs);
  }
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h

index 47c7dfb..cd4ae1b 100644 (file)
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@ enum {
         THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
         THE_NILFS_GC_RUNNING,   /* gc process is running */
         THE_NILFS_SB_DIRTY,     /* super block is dirty */
+       THE_NILFS_PURGING,      /* disposing dirty files for cleanup */
  };
  
  /**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
  THE_NILFS_FNS(DISCONTINUED, discontinued)
  THE_NILFS_FNS(GC_RUNNING, gc_running)
  THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
  
  /*
   * Mount option operations
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c

index 9cb32e1..23fc24d 100644 (file)
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
  
  static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
  {
+       struct file *file = iocb->ki_filp;
+       char *buf = file->private_data;
         loff_t *fpos = &iocb->ki_pos;
         size_t phdrs_offset, notes_offset, data_offset;
         size_t page_offline_frozen = 1;
@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
                 case KCORE_VMEMMAP:
                 case KCORE_TEXT:
                         /*
-                        * We use _copy_to_iter() to bypass usermode hardening
-                        * which would otherwise prevent this operation.
+                        * Sadly we must use a bounce buffer here to be able to
+                        * make use of copy_from_kernel_nofault(), as these
+                        * memory regions might not always be mapped on all
+                        * architectures.
                          */
-                       if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+                       if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+                               if (iov_iter_zero(tsz, iter) != tsz) {
+                                       ret = -EFAULT;
+                                       goto out;
+                               }
+                       /*
+                        * We know the bounce buffer is safe to copy from, so
+                        * use _copy_to_iter() directly.
+                        */
+                       } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
                                 ret = -EFAULT;
                                 goto out;
                         }
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
         if (ret)
                 return ret;
  
+       filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!filp->private_data)
+               return -ENOMEM;
+
         if (kcore_need_update)
                 kcore_update_ram();
         if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
         return 0;
  }
  
+static int release_kcore(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
  static const struct proc_ops kcore_proc_ops = {
         .proc_read_iter = read_kcore_iter,
         .proc_open      = open_kcore,
+       .proc_release   = release_kcore,
         .proc_lseek     = default_llseek,
  };
  
diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c

index 33b7e6c..e881df1 100644 (file)
--- a/fs/smb/server/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -380,13 +380,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
         }
  
         if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
-               if (command == SMB2_OPLOCK_BREAK_HE &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+               if (!(command == SMB2_OPLOCK_BREAK_HE &&
+                   (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 ||
+                   le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) {
                         /* special case for SMB2.1 lease break message */
                         ksmbd_debug(SMB,
-                                   "Illegal request size %d for oplock break\n",
-                                   le16_to_cpu(pdu->StructureSize2));
+                               "Illegal request size %u for command %d\n",
+                               le16_to_cpu(pdu->StructureSize2), command);
                         return 1;
                 }
         }
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c

index 9849d74..7cc1b0c 100644 (file)
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2324,9 +2324,16 @@ next:
                         break;
                 buf_len -= next;
                 eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
-               if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength))
+               if (buf_len < sizeof(struct smb2_ea_info)) {
+                       rc = -EINVAL;
                         break;
+               }
  
+               if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+                               le16_to_cpu(eabuf->EaValueLength)) {
+                       rc = -EINVAL;
+                       break;
+               }
         } while (next != 0);
  
         kfree(attr_name);
diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h

index aca8290..069a019 100644 (file)
--- a/fs/vboxsf/shfl_hostintf.h
+++ b/fs/vboxsf/shfl_hostintf.h
@@ -68,9 +68,9 @@ struct shfl_string {
  
         /** UTF-8 or UTF-16 string. Nul terminated. */
         union {
-               u8 utf8[2];
-               u16 utf16[1];
-               u16 ucs2[1]; /* misnomer, use utf16. */
+               u8 legacy_padding[2];
+               DECLARE_FLEX_ARRAY(u8, utf8);
+               DECLARE_FLEX_ARRAY(u16, utf16);
         } string;
  };
  VMMDEV_ASSERT_SIZE(shfl_string, 6);
diff --git a/include/linux/bio.h b/include/linux/bio.h

index c4f5b52..11984ed 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
  static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
  {
         bio->bi_opf |= REQ_POLLED;
-       if (!is_sync_kiocb(kiocb))
+       if (kiocb->ki_flags & IOCB_NOWAIT)
                 bio->bi_opf |= REQ_NOWAIT;
  }
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index ed44a99..87d94be 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -969,7 +969,6 @@ struct blk_plug {
  
         bool multiple_queues;
         bool has_elevator;
-       bool nowait;
  
         struct list_head cb_list; /* md requires an unplug callback */
  };
diff --git a/include/linux/cpu.h b/include/linux/cpu.h

index 6e6e57e..23ac87b 100644 (file)
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -70,6 +70,8 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
                                         char *buf);
  extern ssize_t cpu_show_retbleed(struct device *dev,
                                  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
+                                            struct device_attribute *attr, char *buf);
  
  extern __printf(4, 5)
  struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h

index 054d791..c163751 100644 (file)
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -62,6 +62,7 @@ struct sk_psock_progs {
  
  enum sk_psock_state_bits {
         SK_PSOCK_TX_ENABLED,
+       SK_PSOCK_RX_STRP_ENABLED,
  };
  
  struct sk_psock_link {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h

index 6a1e8f1..4ee9d13 100644 (file)
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -283,6 +283,7 @@ enum tpm_chip_flags {
         TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED    = BIT(6),
         TPM_CHIP_FLAG_FIRMWARE_UPGRADE          = BIT(7),
         TPM_CHIP_FLAG_SUSPENDED                 = BIT(8),
+       TPM_CHIP_FLAG_HWRNG_DISABLED            = BIT(9),
  };
  
  #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h

index 7c7d03a..d6fa7c8 100644 (file)
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
         if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
                 return NULL;
  
+       if (iftype == NL80211_IFTYPE_AP_VLAN)
+               iftype = NL80211_IFTYPE_AP;
+
         for (i = 0; i < sband->n_iftype_data; i++)  {
                 const struct ieee80211_sband_iftype_data *data =
                         &sband->iftype_data[i];
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index 640441a..3587085 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
   *
   *     @list: table set list node
   *     @bindings: list of set bindings
+ *     @refs: internal refcounting for async set destruction
   *     @table: table this set belongs to
   *     @net: netnamespace this set belongs to
   *     @name: name of the set
@@ -541,6 +542,7 @@ struct nft_set_elem_expr {
  struct nft_set {
         struct list_head                list;
         struct list_head                bindings;
+       refcount_t                      refs;
         struct nft_table                *table;
         possible_net_t                  net;
         char                            *name;
@@ -562,7 +564,8 @@ struct nft_set {
         struct list_head                pending_update;
         /* runtime data below here */
         const struct nft_set_ops        *ops ____cacheline_aligned;
-       u16                             flags:14,
+       u16                             flags:13,
+                                       dead:1,
                                         genmask:2;
         u8                              klen;
         u8                              dlen;
@@ -596,7 +599,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
  
  struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
                                             const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
  
  static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
  {
@@ -813,62 +815,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
  void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
                                 const struct nft_set *set, void *elem);
  
-/**
- *     struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- *     @rcu: rcu head
- *     @set: set the elements belong to
- *     @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
-       struct rcu_head                 rcu;
-       const struct nft_set            *set;
-       unsigned int                    cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE  ((PAGE_SIZE -                             \
-                                 sizeof(struct nft_set_gc_batch_head)) / \
-                                sizeof(void *))
-
-/**
- *     struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- *     @head: GC batch head
- *     @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
-       struct nft_set_gc_batch_head    head;
-       void                            *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
-       if (gcb != NULL)
-               call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
-                      gfp_t gfp)
-{
-       if (gcb != NULL) {
-               if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
-                       return gcb;
-               nft_set_gc_batch_complete(gcb);
-       }
-       return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
-                                       void *elem)
-{
-       gcb->elems[gcb->head.cnt++] = elem;
-}
-
  struct nft_expr_ops;
  /**
   *     struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1503,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
  
  #endif /* IS_ENABLED(CONFIG_NF_TABLES) */
  
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
  
  #if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  2
+#define NFT_SET_ELEM_DEAD_BIT  2
  #elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
  #else
  #error
  #endif
  
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
  {
         unsigned long *word = (unsigned long *)ext;
  
         BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
-       return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       set_bit(NFT_SET_ELEM_DEAD_BIT, word);
  }
  
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
  {
         unsigned long *word = (unsigned long *)ext;
  
-       clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+       return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
  }
  
  /**
@@ -1732,6 +1669,38 @@ struct nft_trans_flowtable {
  #define nft_trans_flowtable_flags(trans)       \
         (((struct nft_trans_flowtable *)trans->data)->flags)
  
+#define NFT_TRANS_GC_BATCHCOUNT        256
+
+struct nft_trans_gc {
+       struct list_head        list;
+       struct net              *net;
+       struct nft_set          *set;
+       u32                     seq;
+       u8                      count;
+       void                    *priv[NFT_TRANS_GC_BATCHCOUNT];
+       struct rcu_head         rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem);
+
  int __init nft_chain_filter_init(void);
  void nft_chain_filter_fini(void);
  
@@ -1758,6 +1727,7 @@ struct nftables_pernet {
         struct mutex            commit_mutex;
         u64                     table_handle;
         unsigned int            base_seq;
+       unsigned int            gc_seq;
  };
  
  extern unsigned int nf_tables_net_id;
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h

index bf06db8..7b1ddff 100644 (file)
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set,
                 __field(const void *, skaddr)
                 __field(__u16, sport)
                 __field(__u16, dport)
+               __field(__u16, family)
                 __array(__u8, saddr, 4)
                 __array(__u8, daddr, 4)
                 __array(__u8, saddr_v6, 16)
@@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set,
  
                 __entry->sport = ntohs(inet->inet_sport);
                 __entry->dport = ntohs(inet->inet_dport);
+               __entry->family = sk->sk_family;
  
                 p32 = (__be32 *) __entry->saddr;
                 *p32 = inet->inet_saddr;
@@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set,
                 __entry->cong_state = ca_state;
         ),
  
-       TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+       TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+                 show_family_name(__entry->family),
                   __entry->sport, __entry->dport,
                   __entry->saddr, __entry->daddr,
                   __entry->saddr_v6, __entry->daddr_v6,
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index f4591b9..93db3e4 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3470,6 +3470,8 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
          * - use the kernel virtual address of the shared io_uring context
          *   (instead of the userspace-provided address, which has to be 0UL
          *   anyway).
+        * - use the same pgoff which the get_unmapped_area() uses to
+        *   calculate the page colouring.
          * For architectures without such aliasing requirements, the
          * architecture will return any suitable mapping because addr is 0.
          */
@@ -3478,6 +3480,7 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
         pgoff = 0;      /* has been translated to ptr above */
  #ifdef SHM_COLOUR
         addr = (uintptr_t) ptr;
+       pgoff = addr >> PAGE_SHIFT;
  #else
         addr = 0UL;
  #endif
diff --git a/io_uring/openclose.c b/io_uring/openclose.c

index 10ca57f..e3fae26 100644 (file)
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
  {
         /*
          * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
-        * it'll always -EAGAIN
+        * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+        * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+        * async for.
          */
-       return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+       return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
  }
  
  static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c

index e1b4bfa..2b4a946 100644 (file)
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1166,7 +1166,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
         int error;
  
         if (!hibernation_available())
-               return 0;
+               return n;
  
         if (len && buf[len-1] == '\n')
                 len--;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 02a8f40..800b420 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -52,6 +52,7 @@
  #include <linux/sched/debug.h>
  #include <linux/nmi.h>
  #include <linux/kvm_para.h>
+#include <linux/delay.h>
  
  #include "workqueue_internal.h"
  
@@ -338,8 +339,10 @@ static cpumask_var_t *wq_numa_possible_cpumask;
   * Per-cpu work items which run for longer than the following threshold are
   * automatically considered CPU intensive and excluded from concurrency
   * management to prevent them from noticeably delaying other per-cpu work items.
+ * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
+ * The actual value is initialized in wq_cpu_intensive_thresh_init().
   */
-static unsigned long wq_cpu_intensive_thresh_us = 10000;
+static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
  module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
  
  static bool wq_disable_numa;
@@ -6513,6 +6516,42 @@ void __init workqueue_init_early(void)
                !system_freezable_power_efficient_wq);
  }
  
+static void __init wq_cpu_intensive_thresh_init(void)
+{
+       unsigned long thresh;
+       unsigned long bogo;
+
+       /* if the user set it to a specific value, keep it */
+       if (wq_cpu_intensive_thresh_us != ULONG_MAX)
+               return;
+
+       /*
+        * The default of 10ms is derived from the fact that most modern (as of
+        * 2023) processors can do a lot in 10ms and that it's just below what
+        * most consider human-perceivable. However, the kernel also runs on a
+        * lot slower CPUs including microcontrollers where the threshold is way
+        * too low.
+        *
+        * Let's scale up the threshold upto 1 second if BogoMips is below 4000.
+        * This is by no means accurate but it doesn't have to be. The mechanism
+        * is still useful even when the threshold is fully scaled up. Also, as
+        * the reports would usually be applicable to everyone, some machines
+        * operating on longer thresholds won't significantly diminish their
+        * usefulness.
+        */
+       thresh = 10 * USEC_PER_MSEC;
+
+       /* see init/calibrate.c for lpj -> BogoMIPS calculation */
+       bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1);
+       if (bogo < 4000)
+               thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC);
+
+       pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
+                loops_per_jiffy, bogo, thresh);
+
+       wq_cpu_intensive_thresh_us = thresh;
+}
+
  /**
   * workqueue_init - bring workqueue subsystem fully online
   *
@@ -6528,6 +6567,8 @@ void __init workqueue_init(void)
         struct worker_pool *pool;
         int cpu, bkt;
  
+       wq_cpu_intensive_thresh_init();
+
         /*
          * It'd be simpler to initialize NUMA in workqueue_init_early() but
          * CPU to node mapping may not be available that early on some
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index fbc89ba..d679851 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1200,7 +1200,7 @@ config WQ_CPU_INTENSIVE_REPORT
         help
           Say Y here to enable reporting of concurrency-managed per-cpu work
           items that hog CPUs for longer than
-         workqueue.cpu_intensive_threshold_us. Workqueue automatically
+         workqueue.cpu_intensive_thresh_us. Workqueue automatically
           detects and excludes them from concurrency management to prevent
           them from stalling other per-cpu work items. Occassional
           triggering may not necessarily indicate a problem. Repeated
diff --git a/lib/scatterlist.c b/lib/scatterlist.c

index e86231a..c65566b 100644 (file)
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
  
  failed:
         while (sgtable->nents > sgtable->orig_nents)
-               put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+               unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
         return res;
  }
  
diff --git a/mm/compaction.c b/mm/compaction.c

index dbc9f86..eacca27 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
  
                 /*
                  * Check if the pageblock has already been marked skipped.
-                * Only the aligned PFN is checked as the caller isolates
+                * Only the first PFN is checked as the caller isolates
                  * COMPACT_CLUSTER_MAX at a time so the second call must
                  * not falsely conclude that the block should be skipped.
                  */
-               if (!valid_page && pageblock_aligned(low_pfn)) {
+               if (!valid_page && (pageblock_aligned(low_pfn) ||
+                                   low_pfn == cc->zone->zone_start_pfn)) {
                         if (!isolation_suitable(cc, page)) {
                                 low_pfn = end_pfn;
                                 folio = NULL;
@@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
                  * before making it "skip" so other compaction instances do
                  * not scan the same block.
                  */
-               if (pageblock_aligned(low_pfn) &&
+               if ((pageblock_aligned(low_pfn) ||
+                    low_pfn == cc->zone->zone_start_pfn) &&
                     !fast_find_block && !isolation_suitable(cc, page))
                         continue;
  
diff --git a/mm/damon/core.c b/mm/damon/core.c

index 91cff7f..eb95809 100644 (file)
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
                 return NULL;
         filter->type = type;
         filter->matching = matching;
+       INIT_LIST_HEAD(&filter->list);
         return filter;
  }
  
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 64a3239..6da626b 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                 unsigned int order) { }
  #endif
  
+static inline void __clear_hugetlb_destructor(struct hstate *h,
+                                               struct folio *folio)
+{
+       lockdep_assert_held(&hugetlb_lock);
+
+       /*
+        * Very subtle
+        *
+        * For non-gigantic pages set the destructor to the normal compound
+        * page dtor.  This is needed in case someone takes an additional
+        * temporary ref to the page, and freeing is delayed until they drop
+        * their reference.
+        *
+        * For gigantic pages set the destructor to the null dtor.  This
+        * destructor will never be called.  Before freeing the gigantic
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
+        * apply.
+        *
+        */
+       if (hstate_is_gigantic(h))
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
+       else
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
+}
+
  /*
- * Remove hugetlb folio from lists, and update dtor so that the folio appears
- * as just a compound page.
+ * Remove hugetlb folio from lists.
+ * If vmemmap exists for the folio, update dtor so that the folio appears
+ * as just a compound page.  Otherwise, wait until after allocating vmemmap
+ * to update dtor.
   *
   * A reference is held on the folio, except in the case of demote.
   *
@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
         }
  
         /*
-        * Very subtle
-        *
-        * For non-gigantic pages set the destructor to the normal compound
-        * page dtor.  This is needed in case someone takes an additional
-        * temporary ref to the page, and freeing is delayed until they drop
-        * their reference.
-        *
-        * For gigantic pages set the destructor to the null dtor.  This
-        * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_folio will turn the folio into a
-        * simple group of pages.  After this the destructor does not
-        * apply.
-        *
-        * This handles the case where more than one ref is held when and
-        * after update_and_free_hugetlb_folio is called.
-        *
-        * In the case of demote we do not ref count the page as it will soon
-        * be turned into a page of smaller size.
+        * We can only clear the hugetlb destructor after allocating vmemmap
+        * pages.  Otherwise, someone (memory error handling) may try to write
+        * to tail struct pages.
+        */
+       if (!folio_test_hugetlb_vmemmap_optimized(folio))
+               __clear_hugetlb_destructor(h, folio);
+
+        /*
+         * In the case of demote we do not ref count the page as it will soon
+         * be turned into a page of smaller size.
          */
         if (!demote)
                 folio_ref_unfreeze(folio, 1);
-       if (hstate_is_gigantic(h))
-               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
-       else
-               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
  
         h->nr_huge_pages--;
         h->nr_huge_pages_node[nid]--;
@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
  {
         int i;
         struct page *subpage;
+       bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
  
         if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                 return;
@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
         if (unlikely(folio_test_hwpoison(folio)))
                 folio_clear_hugetlb_hwpoison(folio);
  
+       /*
+        * If vmemmap pages were allocated above, then we need to clear the
+        * hugetlb destructor under the hugetlb lock.
+        */
+       if (clear_dtor) {
+               spin_lock_irq(&hugetlb_lock);
+               __clear_hugetlb_destructor(h, folio);
+               spin_unlock_irq(&hugetlb_lock);
+       }
+
         for (i = 0; i < pages_per_huge_page(h); i++) {
                 subpage = folio_page(folio, i);
                 subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
diff --git a/mm/ksm.c b/mm/ksm.c

index ba26635..d20d766 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2784,6 +2784,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
                         anon_vma->root == vma->anon_vma->root) {
                 return page;            /* still no need to copy it */
         }
+       if (PageHWPoison(page))
+               return ERR_PTR(-EHWPOISON);
         if (!PageUptodate(page))
                 return page;            /* let do_swap_page report the error */
  
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index ece5d48..9a28503 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2466,7 +2466,7 @@ int unpoison_memory(unsigned long pfn)
  {
         struct folio *folio;
         struct page *p;
-       int ret = -EBUSY;
+       int ret = -EBUSY, ghp;
         unsigned long count = 1;
         bool huge = false;
         static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2499,6 +2499,13 @@ int unpoison_memory(unsigned long pfn)
                 goto unlock_mutex;
         }
  
+       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+               goto unlock_mutex;
+
+       /*
+        * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+        * in folio_mapped() has to be done after folio_test_slab() is checked.
+        */
         if (folio_mapped(folio)) {
                 unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
                                  pfn, &unpoison_rs);
@@ -2511,32 +2518,28 @@ int unpoison_memory(unsigned long pfn)
                 goto unlock_mutex;
         }
  
-       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
-               goto unlock_mutex;
-
-       ret = get_hwpoison_page(p, MF_UNPOISON);
-       if (!ret) {
+       ghp = get_hwpoison_page(p, MF_UNPOISON);
+       if (!ghp) {
                 if (PageHuge(p)) {
                         huge = true;
                         count = folio_free_raw_hwp(folio, false);
-                       if (count == 0) {
-                               ret = -EBUSY;
+                       if (count == 0)
                                 goto unlock_mutex;
-                       }
                 }
                 ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
-       } else if (ret < 0) {
-               if (ret == -EHWPOISON) {
+       } else if (ghp < 0) {
+               if (ghp == -EHWPOISON) {
                         ret = put_page_back_buddy(p) ? 0 : -EBUSY;
-               } else
+               } else {
+                       ret = ghp;
                         unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
                                          pfn, &unpoison_rs);
+               }
         } else {
                 if (PageHuge(p)) {
                         huge = true;
                         count = folio_free_raw_hwp(folio, false);
                         if (count == 0) {
-                               ret = -EBUSY;
                                 folio_put(folio);
                                 goto unlock_mutex;
                         }
diff --git a/mm/memory.c b/mm/memory.c

index 603b2f4..1ec1ef3 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5705,6 +5705,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
         if (mmap_read_lock_killable(mm))
                 return 0;
  
+       /* Untag the address before looking up the VMA */
+       addr = untagged_addr_remote(mm, addr);
+
         /* Avoid triggering the temporary warning in __get_user_pages */
         if (!vma_lookup(mm, addr) && !expand_stack(mm, addr))
                 return 0;
diff --git a/mm/swapfile.c b/mm/swapfile.c

index 8e6dde6..b15112b 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
         struct page *swapcache;
         spinlock_t *ptl;
         pte_t *pte, new_pte, old_pte;
-       bool hwposioned = false;
+       bool hwpoisoned = PageHWPoison(page);
         int ret = 1;
  
         swapcache = page;
@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
         if (unlikely(!page))
                 return -ENOMEM;
         else if (unlikely(PTR_ERR(page) == -EHWPOISON))
-               hwposioned = true;
+               hwpoisoned = true;
  
         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
         if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
  
         old_pte = ptep_get(pte);
  
-       if (unlikely(hwposioned || !PageUptodate(page))) {
+       if (unlikely(hwpoisoned || !PageUptodate(page))) {
                 swp_entry_t swp_entry;
  
                 dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               if (hwposioned) {
+               if (hwpoisoned) {
                         swp_entry = make_hwpoison_entry(swapcache);
                         page = swapcache;
                 } else {
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c

index 3f05797..32916d2 100644 (file)
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
  
  static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
  {
+       struct zs_pool *pool;
         struct zspage *zspage;
  
         /*
@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
         VM_BUG_ON_PAGE(PageIsolated(page), page);
  
         zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
         inc_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
  
         return true;
  }
@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
         kunmap_atomic(s_addr);
  
         replace_sub_page(class, zspage, newpage, page);
+       dec_zspage_isolation(zspage);
         /*
          * Since we complete the data copy and set up new zspage structure,
          * it's okay to release the pool's lock.
          */
         spin_unlock(&pool->lock);
-       dec_zspage_isolation(zspage);
         migrate_write_unlock(zspage);
  
         get_page(newpage);
@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
  
  static void zs_page_putback(struct page *page)
  {
+       struct zs_pool *pool;
         struct zspage *zspage;
  
         VM_BUG_ON_PAGE(!PageIsolated(page), page);
  
         zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
         dec_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
  }
  
  static const struct movable_operations zsmalloc_mops = {
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c

index e40aa3e..b366211 100644 (file)
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -384,8 +384,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
                         dev->name);
                 vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
         }
-       if (event == NETDEV_DOWN &&
-           (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+       if (event == NETDEV_DOWN)
                 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
  
         vlan_info = rtnl_dereference(dev->vlan_info);
diff --git a/net/core/filter.c b/net/core/filter.c

index 06ba0e5..28a5959 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
         if (unlikely(data_end > data_hard_end))
                 return -EINVAL;
  
-       /* ALL drivers MUST init xdp->frame_sz, chicken check below */
-       if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
-               WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
-               return -EINVAL;
-       }
-
         if (unlikely(data_end < xdp->data + ETH_HLEN))
                 return -EINVAL;
  
diff --git a/net/core/skmsg.c b/net/core/skmsg.c

index a29508e..ef1a2eb 100644 (file)
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
  
  int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
  {
+       int ret;
+
         static const struct strp_callbacks cb = {
                 .rcv_msg        = sk_psock_strp_read,
                 .read_sock_done = sk_psock_strp_read_done,
                 .parse_msg      = sk_psock_strp_parse,
         };
  
-       return strp_init(&psock->strp, sk, &cb);
+       ret = strp_init(&psock->strp, sk, &cb);
+       if (!ret)
+               sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+       return ret;
  }
  
  void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
  static void sk_psock_done_strp(struct sk_psock *psock)
  {
         /* Parser has been stopped */
-       if (psock->progs.stream_parser)
+       if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
                 strp_done(&psock->strp);
  }
  #else
diff --git a/net/core/sock.c b/net/core/sock.c

index 6d4f28e..732fc37 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1778,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                 spin_unlock(&sk->sk_peer_lock);
  
                 if (!peer_pid)
-                       return -ESRCH;
+                       return -ENODATA;
  
                 pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
                 put_pid(peer_pid);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c

index 08ab108..8f07fea 100644 (file)
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -146,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
         list_for_each_entry_safe(link, tmp, &psock->link, list) {
                 if (link->link_raw == link_raw) {
                         struct bpf_map *map = link->map;
-                       struct bpf_stab *stab = container_of(map, struct bpf_stab,
-                                                            map);
-                       if (psock->saved_data_ready && stab->progs.stream_parser)
+                       struct sk_psock_progs *progs = sock_map_progs(map);
+
+                       if (psock->saved_data_ready && progs->stream_parser)
                                 strp_stop = true;
-                       if (psock->saved_data_ready && stab->progs.stream_verdict)
+                       if (psock->saved_data_ready && progs->stream_verdict)
                                 verdict_stop = true;
-                       if (psock->saved_data_ready && stab->progs.skb_verdict)
+                       if (psock->saved_data_ready && progs->skb_verdict)
                                 verdict_stop = true;
                         list_del(&link->list);
                         sk_psock_free_link(link);
diff --git a/net/dccp/output.c b/net/dccp/output.c

index b8a2473..fd2eb14 100644 (file)
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
  
         /* And store cached results */
         icsk->icsk_pmtu_cookie = pmtu;
-       dp->dccps_mss_cache = cur_mps;
+       WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
  
         return cur_mps;
  }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c

index f331e59..4e3266e 100644 (file)
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -630,7 +630,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
                 return dccp_getsockopt_service(sk, len,
                                                (__be32 __user *)optval, optlen);
         case DCCP_SOCKOPT_GET_CUR_MPS:
-               val = dp->dccps_mss_cache;
+               val = READ_ONCE(dp->dccps_mss_cache);
                 break;
         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +739,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
  
         trace_dccp_probe(sk, len);
  
-       if (len > dp->dccps_mss_cache)
+       if (len > READ_ONCE(dp->dccps_mss_cache))
                 return -EMSGSIZE;
  
         lock_sock(sk);
@@ -772,6 +772,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                 goto out_discard;
         }
  
+       /* We need to check dccps_mss_cache after socket is locked. */
+       if (len > dp->dccps_mss_cache) {
+               rc = -EMSGSIZE;
+               goto out_discard;
+       }
+
         skb_reserve(skb, sk->sk_prot->max_header);
         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
         if (rc != 0)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c

index 92c02c8..586b1b3 100644 (file)
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
                 .un.frag.__unused       = 0,
                 .un.frag.mtu            = htons(mtu),
         };
-       icmph->checksum = ip_compute_csum(icmph, len);
+       icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
         skb_reset_transport_header(skb);
  
         niph = skb_push(skb, sizeof(*niph));
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c

index f95142e..be5498f 100644 (file)
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
                                      &rtm_dump_nexthop_cb, &filter);
         if (err < 0) {
                 if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
         }
  
-out:
-       err = skb->len;
-out_err:
         cb->seq = net->nexthop.seq;
         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
         return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
                     dd->filter.res_bucket_nh_id != nhge->nh->id)
                         continue;
  
+               dd->ctx->bucket_index = bucket_index;
                 err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
                                          RTM_NEWNEXTHOPBUCKET, portid,
                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                          cb->extack);
-               if (err < 0) {
-                       if (likely(skb->len))
-                               goto out;
-                       goto out_err;
-               }
+               if (err)
+                       return err;
         }
  
         dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
-       bucket_index = 0;
+       dd->ctx->bucket_index = 0;
  
-out:
-       err = skb->len;
-out_err:
-       dd->ctx->bucket_index = bucket_index;
-       return err;
+       return 0;
  }
  
  static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
  
         if (err < 0) {
                 if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
         }
  
-out:
-       err = skb->len;
-out_err:
         cb->seq = net->nexthop.seq;
         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
         return err;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c

index 18634eb..a42be96 100644 (file)
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
  static inline int ndisc_is_useropt(const struct net_device *dev,
                                    struct nd_opt_hdr *opt)
  {
-       return opt->nd_opt_type == ND_OPT_RDNSS ||
+       return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+               opt->nd_opt_type == ND_OPT_RDNSS ||
                 opt->nd_opt_type == ND_OPT_DNSSL ||
                 opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
                 opt->nd_opt_type == ND_OPT_PREF64 ||
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c

index 3317d1c..d806585 100644 (file)
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
  
         lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
  
-       if (flags & MPTCP_CF_FASTCLOSE) {
+       if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
                 /* be sure to force the tcp_disconnect() path,
                  * to generate the egress reset
                  */
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
  
         if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
                 __mptcp_clean_una_wakeup(sk);
-       if (unlikely(&msk->cb_flags)) {
+       if (unlikely(msk->cb_flags)) {
                 /* be sure to set the current sk state before tacking actions
                  * depending on sk_state, that is processing MPTCP_ERROR_REPORT
                  */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h

index 37fbe22..ba2a873 100644 (file)
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -325,7 +325,6 @@ struct mptcp_sock {
         u32             subflow_id;
         u32             setsockopt_seq;
         char            ca_name[TCP_CA_NAME_MAX];
-       struct mptcp_sock       *dl_next;
  };
  
  #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c

index 9ee3b7a..94ae7dd 100644 (file)
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
  void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
  {
         struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
-       struct mptcp_sock *msk, *next, *head = NULL;
-       struct request_sock *req;
-       struct sock *sk;
+       struct request_sock *req, *head, *tail;
+       struct mptcp_subflow_context *subflow;
+       struct sock *sk, *ssk;
  
-       /* build a list of all unaccepted mptcp sockets */
+       /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+        * Splice the req list, so that accept() can not reach the pending ssk after
+        * the listener socket is released below.
+        */
         spin_lock_bh(&queue->rskq_lock);
-       for (req = queue->rskq_accept_head; req; req = req->dl_next) {
-               struct mptcp_subflow_context *subflow;
-               struct sock *ssk = req->sk;
+       head = queue->rskq_accept_head;
+       tail = queue->rskq_accept_tail;
+       queue->rskq_accept_head = NULL;
+       queue->rskq_accept_tail = NULL;
+       spin_unlock_bh(&queue->rskq_lock);
+
+       if (!head)
+               return;
  
+       /* can't acquire the msk socket lock under the subflow one,
+        * or will cause ABBA deadlock
+        */
+       release_sock(listener_ssk);
+
+       for (req = head; req; req = req->dl_next) {
+               ssk = req->sk;
                 if (!sk_is_mptcp(ssk))
                         continue;
  
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
                 if (!subflow || !subflow->conn)
                         continue;
  
-               /* skip if already in list */
                 sk = subflow->conn;
-               msk = mptcp_sk(sk);
-               if (msk->dl_next || msk == head)
-                       continue;
-
                 sock_hold(sk);
-               msk->dl_next = head;
-               head = msk;
-       }
-       spin_unlock_bh(&queue->rskq_lock);
-       if (!head)
-               return;
-
-       /* can't acquire the msk socket lock under the subflow one,
-        * or will cause ABBA deadlock
-        */
-       release_sock(listener_ssk);
-
-       for (msk = head; msk; msk = next) {
-               sk = (struct sock *)msk;
  
                 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-               next = msk->dl_next;
-               msk->dl_next = NULL;
-
                 __mptcp_unaccepted_force_close(sk);
                 release_sock(sk);
  
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
  
         /* we are still under the listener msk socket lock */
         lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+       /* restore the listener queue, to let the TCP code clean it up */
+       spin_lock_bh(&queue->rskq_lock);
+       WARN_ON_ONCE(queue->rskq_accept_head);
+       queue->rskq_accept_head = head;
+       queue->rskq_accept_tail = tail;
+       spin_unlock_bh(&queue->rskq_lock);
  }
  
  static int subflow_ulp_init(struct sock *sk)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index d3c6ecd..c62227a 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
  static LIST_HEAD(nf_tables_objects);
  static LIST_HEAD(nf_tables_flowtables);
  static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
  static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
  
  enum {
         NFT_VALIDATE_SKIP       = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
  static void nf_tables_trans_destroy_work(struct work_struct *w);
  static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
  
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
  static void nft_ctx_init(struct nft_ctx *ctx,
                          struct net *net,
                          const struct sk_buff *skb,
@@ -582,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
         return __nft_trans_set_add(ctx, msg_type, set, NULL);
  }
  
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem);
-
  static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
                                   struct nft_set *set,
                                   const struct nft_set_iter *iter,
@@ -5055,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
  
         INIT_LIST_HEAD(&set->bindings);
         INIT_LIST_HEAD(&set->catchall_list);
+       refcount_set(&set->refs, 1);
         set->table = table;
         write_pnet(&set->net, net);
         set->ops = ops;
@@ -5122,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
         }
  }
  
+static void nft_set_put(struct nft_set *set)
+{
+       if (refcount_dec_and_test(&set->refs)) {
+               kfree(set->name);
+               kvfree(set);
+       }
+}
+
  static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
  {
         int i;
@@ -5134,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
  
         set->ops->destroy(ctx, set);
         nft_set_catchall_destroy(ctx, set);
-       kfree(set->name);
-       kvfree(set);
+       nft_set_put(set);
  }
  
  static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5602,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
                                   const struct nft_set_iter *iter,
                                   struct nft_set_elem *elem)
  {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
         struct nft_set_dump_args *args;
  
+       if (nft_set_elem_expired(ext))
+               return 0;
+
         args = container_of(iter, struct nft_set_dump_args, iter);
         return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
  }
@@ -6274,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
         list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                 ext = nft_set_elem_ext(set, catchall->elem);
                 if (nft_set_elem_active(ext, genmask) &&
-                   !nft_set_elem_expired(ext))
+                   !nft_set_elem_expired(ext) &&
+                   !nft_set_elem_is_dead(ext))
                         return ext;
         }
  
@@ -6282,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
  }
  EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
  
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
-       struct nft_set_elem_catchall *catchall, *next;
-       struct nft_set_ext *ext;
-       void *elem = NULL;
-
-       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
-               ext = nft_set_elem_ext(set, catchall->elem);
-
-               if (!nft_set_elem_expired(ext) ||
-                   nft_set_elem_mark_busy(ext))
-                       continue;
-
-               elem = catchall->elem;
-               list_del_rcu(&catchall->list);
-               kfree_rcu(catchall, rcu);
-               break;
-       }
-
-       return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
  static int nft_setelem_catchall_insert(const struct net *net,
                                        struct nft_set *set,
                                        const struct nft_set_elem *elem,
@@ -6366,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
  
         if (nft_setelem_is_catchall(set, elem)) {
                 nft_set_elem_change_active(net, set, ext);
-               nft_set_elem_clear_busy(ext);
         } else {
                 set->ops->activate(net, set, elem);
         }
@@ -6381,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
  
         list_for_each_entry(catchall, &set->catchall_list, list) {
                 ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_is_active(net, ext) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_is_active(net, ext))
                         continue;
  
                 kfree(elem->priv);
@@ -6777,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                 goto err_elem_free;
         }
  
-       ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+       ext->genmask = nft_genmask_cur(ctx->net);
  
         err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
         if (err) {
@@ -6929,9 +6918,9 @@ static void nft_setelem_data_activate(const struct net *net,
                 nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
  }
  
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem)
  {
         const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
  
@@ -7095,8 +7084,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
  
         list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                 ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_set_elem_active(ext, genmask) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_set_elem_active(ext, genmask))
                         continue;
  
                 elem.priv = catchall->elem;
@@ -7170,29 +7158,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
         return err;
  }
  
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
-       struct nft_set_gc_batch *gcb;
-       unsigned int i;
-
-       gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
-       for (i = 0; i < gcb->head.cnt; i++)
-               nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
-       kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp)
-{
-       struct nft_set_gc_batch *gcb;
-
-       gcb = kzalloc(sizeof(*gcb), gfp);
-       if (gcb == NULL)
-               return gcb;
-       gcb->head.set = set;
-       return gcb;
-}
-
  /*
   * Stateful objects
   */
@@ -9414,6 +9379,207 @@ void nft_chain_del(struct nft_chain *chain)
         list_del_rcu(&chain->list);
  }
  
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+                                       struct nft_trans_gc *trans)
+{
+       void **priv = trans->priv;
+       unsigned int i;
+
+       for (i = 0; i < trans->count; i++) {
+               struct nft_set_elem elem = {
+                       .priv = priv[i],
+               };
+
+               nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+               nft_setelem_remove(ctx->net, trans->set, &elem);
+       }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+       nft_set_put(trans->set);
+       put_net(trans->net);
+       kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+       struct nft_set_elem elem = {};
+       struct nft_trans_gc *trans;
+       struct nft_ctx ctx = {};
+       unsigned int i;
+
+       trans = container_of(rcu, struct nft_trans_gc, rcu);
+       ctx.net = read_pnet(&trans->set->net);
+
+       for (i = 0; i < trans->count; i++) {
+               elem.priv = trans->priv[i];
+               if (!nft_setelem_is_catchall(trans->set, &elem))
+                       atomic_dec(&trans->set->nelems);
+
+               nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+       }
+
+       nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+       struct nftables_pernet *nft_net;
+       struct nft_ctx ctx = {};
+
+       nft_net = nft_pernet(trans->net);
+
+       mutex_lock(&nft_net->commit_mutex);
+
+       /* Check for race with transaction, otherwise this batch refers to
+        * stale objects that might not be there anymore. Skip transaction if
+        * set has been destroyed from control plane transaction in case gc
+        * worker loses race.
+        */
+       if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+               mutex_unlock(&nft_net->commit_mutex);
+               return false;
+       }
+
+       ctx.net = trans->net;
+       ctx.table = trans->set->table;
+
+       nft_trans_gc_setelem_remove(&ctx, trans);
+       mutex_unlock(&nft_net->commit_mutex);
+
+       return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+       struct nft_trans_gc *trans, *next;
+       LIST_HEAD(trans_gc_list);
+
+       spin_lock(&nf_tables_destroy_list_lock);
+       list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+       spin_unlock(&nf_tables_destroy_list_lock);
+
+       list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+               list_del(&trans->list);
+               if (!nft_trans_gc_work_done(trans)) {
+                       nft_trans_gc_destroy(trans);
+                       continue;
+               }
+               call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+       }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp)
+{
+       struct net *net = read_pnet(&set->net);
+       struct nft_trans_gc *trans;
+
+       trans = kzalloc(sizeof(*trans), gfp);
+       if (!trans)
+               return NULL;
+
+       refcount_inc(&set->refs);
+       trans->set = set;
+       trans->net = get_net(net);
+       trans->seq = gc_seq;
+
+       return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+       trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+       spin_lock(&nf_tables_gc_list_lock);
+       list_add_tail(&trans->list, &nf_tables_gc_list);
+       spin_unlock(&nf_tables_gc_list_lock);
+
+       schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+       return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp)
+{
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       nft_trans_gc_queue_work(gc);
+
+       return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+       if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+               return NULL;
+
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+       return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+       WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq)
+{
+       struct nft_set_elem_catchall *catchall;
+       const struct nft_set *set = gc->set;
+       struct nft_set_ext *ext;
+
+       list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+               ext = nft_set_elem_ext(set, catchall->elem);
+
+               if (!nft_set_elem_expired(ext))
+                       continue;
+               if (nft_set_elem_is_dead(ext))
+                       goto dead_elem;
+
+               nft_set_elem_dead(ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       return NULL;
+
+               nft_trans_gc_elem_add(gc, catchall->elem);
+       }
+
+       return gc;
+}
+
  static void nf_tables_module_autoload_cleanup(struct net *net)
  {
         struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9576,11 +9742,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
  {
         struct nftables_pernet *nft_net = nft_pernet(net);
         struct nft_trans *trans, *next;
+       unsigned int base_seq, gc_seq;
         LIST_HEAD(set_update_list);
         struct nft_trans_elem *te;
         struct nft_chain *chain;
         struct nft_table *table;
-       unsigned int base_seq;
         LIST_HEAD(adl);
         int err;
  
@@ -9657,6 +9823,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
  
         WRITE_ONCE(nft_net->base_seq, base_seq);
  
+       /* Bump gc counter, it becomes odd, this is the busy mark. */
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
         /* step 3. Start new generation, rules_gen_X now in use. */
         net->nft.gencursor = nft_gencursor_next(net);
  
@@ -9764,6 +9934,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                         break;
                 case NFT_MSG_DELSET:
                 case NFT_MSG_DESTROYSET:
+                       nft_trans_set(trans)->dead = 1;
                         list_del_rcu(&nft_trans_set(trans)->list);
                         nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
                                              trans->msg_type, GFP_KERNEL);
@@ -9866,6 +10037,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
         nft_commit_notify(net, NETLINK_CB(skb).portid);
         nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
         nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
         nf_tables_commit_release(net);
  
         return 0;
@@ -10915,6 +11088,7 @@ static int __net_init nf_tables_init_net(struct net *net)
         INIT_LIST_HEAD(&nft_net->notify_list);
         mutex_init(&nft_net->commit_mutex);
         nft_net->base_seq = 1;
+       nft_net->gc_seq = 0;
  
         return 0;
  }
@@ -10943,10 +11117,16 @@ static void __net_exit nf_tables_exit_net(struct net *net)
         WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
  }
  
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+       flush_work(&trans_gc_work);
+}
+
  static struct pernet_operations nf_tables_net_ops = {
         .init           = nf_tables_init_net,
         .pre_exit       = nf_tables_pre_exit_net,
         .exit           = nf_tables_exit_net,
+       .exit_batch     = nf_tables_exit_batch,
         .id             = &nf_tables_net_id,
         .size           = sizeof(struct nftables_pernet),
  };
@@ -11018,6 +11198,7 @@ static void __exit nf_tables_module_exit(void)
         nft_chain_filter_fini();
         nft_chain_route_fini();
         unregister_pernet_subsys(&nf_tables_net_ops);
+       cancel_work_sync(&trans_gc_work);
         cancel_work_sync(&trans_destroy_work);
         rcu_barrier();
         rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c

index 0b73cb0..cef5df8 100644 (file)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
  
         if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
                 return 1;
+       if (nft_set_elem_is_dead(&he->ext))
+               return 1;
         if (nft_set_elem_expired(&he->ext))
                 return 1;
         if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
         struct nft_rhash_elem *he = elem->priv;
  
         nft_set_elem_change_active(net, set, &he->ext);
-       nft_set_elem_clear_busy(&he->ext);
  }
  
  static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
  {
         struct nft_rhash_elem *he = priv;
  
-       if (!nft_set_elem_mark_busy(&he->ext) ||
-           !nft_is_active(net, &he->ext)) {
-               nft_set_elem_change_active(net, set, &he->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &he->ext);
+
+       return true;
  }
  
  static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
  
         rcu_read_lock();
         he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
-       if (he != NULL &&
-           !nft_rhash_flush(net, set, he))
-               he = NULL;
+       if (he)
+               nft_set_elem_change_active(net, set, &he->ext);
  
         rcu_read_unlock();
  
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
         if (he == NULL)
                 return false;
  
-       return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+       nft_set_elem_dead(&he->ext);
+
+       return true;
  }
  
  static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
  
                 if (iter->count < iter->skip)
                         goto cont;
-               if (nft_set_elem_expired(&he->ext))
-                       goto cont;
                 if (!nft_set_elem_active(&he->ext, iter->genmask))
                         goto cont;
  
@@ -314,25 +311,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
  
  static void nft_rhash_gc(struct work_struct *work)
  {
+       struct nftables_pernet *nft_net;
         struct nft_set *set;
         struct nft_rhash_elem *he;
         struct nft_rhash *priv;
-       struct nft_set_gc_batch *gcb = NULL;
         struct rhashtable_iter hti;
+       struct nft_trans_gc *gc;
+       struct net *net;
+       u32 gc_seq;
  
         priv = container_of(work, struct nft_rhash, gc_work.work);
         set  = nft_set_container_of(priv);
+       net  = read_pnet(&set->net);
+       nft_net = nft_pernet(net);
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
  
         rhashtable_walk_enter(&priv->ht, &hti);
         rhashtable_walk_start(&hti);
  
         while ((he = rhashtable_walk_next(&hti))) {
                 if (IS_ERR(he)) {
-                       if (PTR_ERR(he) != -EAGAIN)
-                               break;
+                       if (PTR_ERR(he) != -EAGAIN) {
+                               nft_trans_gc_destroy(gc);
+                               gc = NULL;
+                               goto try_later;
+                       }
                         continue;
                 }
  
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
+               if (nft_set_elem_is_dead(&he->ext))
+                       goto dead_elem;
+
                 if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
                     nft_rhash_expr_needs_gc_run(set, &he->ext))
                         goto needs_gc_run;
@@ -340,26 +360,26 @@ static void nft_rhash_gc(struct work_struct *work)
                 if (!nft_set_elem_expired(&he->ext))
                         continue;
  needs_gc_run:
-               if (nft_set_elem_mark_busy(&he->ext))
-                       continue;
+               nft_set_elem_dead(&he->ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
  
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb == NULL)
-                       break;
-               rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, he);
+               nft_trans_gc_elem_add(gc, he);
         }
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+       /* catchall list iteration requires rcu read side lock. */
         rhashtable_walk_stop(&hti);
         rhashtable_walk_exit(&hti);
  
-       he = nft_set_catchall_gc(set);
-       if (he) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, he);
-       }
-       nft_set_gc_batch_complete(gcb);
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+
+done:
         queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                            nft_set_gc_interval(set));
  }
@@ -394,7 +414,7 @@ static int nft_rhash_init(const struct nft_set *set,
                 return err;
  
         INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
-       if (set->flags & NFT_SET_TIMEOUT)
+       if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
                 nft_rhash_gc_init(set);
  
         return 0;
@@ -422,7 +442,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
         };
  
         cancel_delayed_work_sync(&priv->gc_work);
-       rcu_barrier();
         rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
                                     (void *)&rhash_ctx);
  }
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c

index 49915a2..a5b8301 100644 (file)
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,7 @@ next_match:
                         goto out;
  
                 if (last) {
-                       if (nft_set_elem_expired(&f->mt[b].e->ext) ||
-                           (genmask &&
+                       if ((genmask &&
                              !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
                                 goto next_match;
  
@@ -601,8 +600,17 @@ out:
  static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
                             const struct nft_set_elem *elem, unsigned int flags)
  {
-       return pipapo_get(net, set, (const u8 *)elem->key.val.data,
-                         nft_genmask_cur(net));
+       struct nft_pipapo_elem *ret;
+
+       ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+                        nft_genmask_cur(net));
+       if (IS_ERR(ret))
+               return ret;
+
+       if (nft_set_elem_expired(&ret->ext))
+               return ERR_PTR(-ENOENT);
+
+       return ret;
  }
  
  /**
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
         }
  }
  
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+                                    struct nft_pipapo_elem *e)
+
+{
+       struct nft_set_elem elem = {
+               .priv   = e,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+}
+
  /**
   * pipapo_gc() - Drop expired entries from set, destroy start and end elements
   * @set:       nftables API set representation
   * @m:         Matching data
   */
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
  {
+       struct nft_set *set = (struct nft_set *) _set;
         struct nft_pipapo *priv = nft_set_priv(set);
+       struct net *net = read_pnet(&set->net);
         int rules_f0, first_rule = 0;
         struct nft_pipapo_elem *e;
+       struct nft_trans_gc *gc;
+
+       gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+       if (!gc)
+               return;
  
         while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
                 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                 f--;
                 i--;
                 e = f->mt[rulemap[i].to].e;
-               if (nft_set_elem_expired(&e->ext) &&
-                   !nft_set_elem_mark_busy(&e->ext)) {
+
+               /* synchronous gc never fails, there is no need to set on
+                * NFT_SET_ELEM_DEAD_BIT.
+                */
+               if (nft_set_elem_expired(&e->ext)) {
                         priv->dirty = true;
-                       pipapo_drop(m, rulemap);
  
-                       rcu_barrier();
-                       nft_set_elem_destroy(set, e, true);
+                       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+                       if (!gc)
+                               break;
+
+                       nft_pipapo_gc_deactivate(net, set, e);
+                       pipapo_drop(m, rulemap);
+                       nft_trans_gc_elem_add(gc, e);
  
                         /* And check again current first rule, which is now the
                          * first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                 }
         }
  
-       e = nft_set_catchall_gc(set);
-       if (e)
-               nft_set_elem_destroy(set, e, true);
-
-       priv->last_gc = jiffies;
+       gc = nft_trans_gc_catchall(gc, 0);
+       if (gc) {
+               nft_trans_gc_queue_sync_done(gc);
+               priv->last_gc = jiffies;
+       }
  }
  
  /**
@@ -1706,7 +1739,6 @@ static void nft_pipapo_activate(const struct net *net,
                 return;
  
         nft_set_elem_change_active(net, set, &e->ext);
-       nft_set_elem_clear_busy(&e->ext);
  }
  
  /**
@@ -2005,8 +2037,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
                         goto cont;
  
                 e = f->mt[r].e;
-               if (nft_set_elem_expired(&e->ext))
-                       goto cont;
  
                 elem.priv = e;
  
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c

index 8d73fff..f9d4c8f 100644 (file)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
                       set->klen);
  }
  
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+       return nft_set_elem_expired(&rbe->ext) ||
+              nft_set_elem_is_dead(&rbe->ext);
+}
+
  static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                                 const u32 *key, const struct nft_set_ext **ext,
                                 unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
                                 continue;
                         }
  
-                       if (nft_set_elem_expired(&rbe->ext))
+                       if (nft_rbtree_elem_expired(rbe))
                                 return false;
  
                         if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
  
         if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
             nft_set_elem_active(&interval->ext, genmask) &&
-           !nft_set_elem_expired(&interval->ext) &&
+           !nft_rbtree_elem_expired(interval) &&
             nft_rbtree_interval_start(interval)) {
                 *ext = &interval->ext;
                 return true;
@@ -215,6 +221,18 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
         return rbe;
  }
  
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+                                struct nft_rbtree *priv,
+                                struct nft_rbtree_elem *rbe)
+{
+       struct nft_set_elem elem = {
+               .priv   = rbe,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+       rb_erase(&rbe->node, &priv->root);
+}
+
  static int nft_rbtree_gc_elem(const struct nft_set *__set,
                               struct nft_rbtree *priv,
                               struct nft_rbtree_elem *rbe,
@@ -222,11 +240,12 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
  {
         struct nft_set *set = (struct nft_set *)__set;
         struct rb_node *prev = rb_prev(&rbe->node);
+       struct net *net = read_pnet(&set->net);
         struct nft_rbtree_elem *rbe_prev;
-       struct nft_set_gc_batch *gcb;
+       struct nft_trans_gc *gc;
  
-       gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
-       if (!gcb)
+       gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+       if (!gc)
                 return -ENOMEM;
  
         /* search for end interval coming before this element.
@@ -244,17 +263,28 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
  
         if (prev) {
                 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+               nft_rbtree_gc_remove(net, set, priv, rbe_prev);
  
-               rb_erase(&rbe_prev->node, &priv->root);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, rbe_prev);
+               /* There is always room in this trans gc for this element,
+                * memory allocation never actually happens, hence, the warning
+                * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+                * this is synchronous gc which never fails.
+                */
+               gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+               if (WARN_ON_ONCE(!gc))
+                       return -ENOMEM;
+
+               nft_trans_gc_elem_add(gc, rbe_prev);
         }
  
-       rb_erase(&rbe->node, &priv->root);
-       atomic_dec(&set->nelems);
+       nft_rbtree_gc_remove(net, set, priv, rbe);
+       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+       if (WARN_ON_ONCE(!gc))
+               return -ENOMEM;
+
+       nft_trans_gc_elem_add(gc, rbe);
  
-       nft_set_gc_batch_add(gcb, rbe);
-       nft_set_gc_batch_complete(gcb);
+       nft_trans_gc_queue_sync_done(gc);
  
         return 0;
  }
@@ -482,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
         struct nft_rbtree_elem *rbe = elem->priv;
  
         nft_set_elem_change_active(net, set, &rbe->ext);
-       nft_set_elem_clear_busy(&rbe->ext);
  }
  
  static bool nft_rbtree_flush(const struct net *net,
@@ -490,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
  {
         struct nft_rbtree_elem *rbe = priv;
  
-       if (!nft_set_elem_mark_busy(&rbe->ext) ||
-           !nft_is_active(net, &rbe->ext)) {
-               nft_set_elem_change_active(net, set, &rbe->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &rbe->ext);
+
+       return true;
  }
  
  static void *nft_rbtree_deactivate(const struct net *net,
@@ -552,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
  
                 if (iter->count < iter->skip)
                         goto cont;
-               if (nft_set_elem_expired(&rbe->ext))
-                       goto cont;
                 if (!nft_set_elem_active(&rbe->ext, iter->genmask))
                         goto cont;
  
@@ -572,26 +596,40 @@ cont:
  
  static void nft_rbtree_gc(struct work_struct *work)
  {
-       struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
-       struct nft_set_gc_batch *gcb = NULL;
+       struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+       struct nftables_pernet *nft_net;
         struct nft_rbtree *priv;
+       struct nft_trans_gc *gc;
         struct rb_node *node;
         struct nft_set *set;
+       unsigned int gc_seq;
         struct net *net;
-       u8 genmask;
  
         priv = container_of(work, struct nft_rbtree, gc_work.work);
         set  = nft_set_container_of(priv);
         net  = read_pnet(&set->net);
-       genmask = nft_genmask_cur(net);
+       nft_net = nft_pernet(net);
+       gc_seq  = READ_ONCE(nft_net->gc_seq);
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
  
         write_lock_bh(&priv->lock);
         write_seqcount_begin(&priv->count);
         for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
  
-               if (!nft_set_elem_active(&rbe->ext, genmask))
-                       continue;
+               if (nft_set_elem_is_dead(&rbe->ext))
+                       goto dead_elem;
  
                 /* elements are reversed in the rbtree for historical reasons,
                  * from highest to lowest value, that is why end element is
@@ -604,46 +642,36 @@ static void nft_rbtree_gc(struct work_struct *work)
                 if (!nft_set_elem_expired(&rbe->ext))
                         continue;
  
-               if (nft_set_elem_mark_busy(&rbe->ext)) {
-                       rbe_end = NULL;
+               nft_set_elem_dead(&rbe->ext);
+
+               if (!rbe_end)
                         continue;
-               }
  
-               if (rbe_prev) {
-                       rb_erase(&rbe_prev->node, &priv->root);
-                       rbe_prev = NULL;
-               }
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (!gcb)
-                       break;
+               nft_set_elem_dead(&rbe_end->ext);
  
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, rbe);
-               rbe_prev = rbe;
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
  
-               if (rbe_end) {
-                       atomic_dec(&set->nelems);
-                       nft_set_gc_batch_add(gcb, rbe_end);
-                       rb_erase(&rbe_end->node, &priv->root);
-                       rbe_end = NULL;
-               }
-               node = rb_next(node);
-               if (!node)
-                       break;
+               nft_trans_gc_elem_add(gc, rbe_end);
+               rbe_end = NULL;
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
+
+               nft_trans_gc_elem_add(gc, rbe);
         }
-       if (rbe_prev)
-               rb_erase(&rbe_prev->node, &priv->root);
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
         write_seqcount_end(&priv->count);
         write_unlock_bh(&priv->lock);
  
-       rbe = nft_set_catchall_gc(set);
-       if (rbe) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, rbe);
-       }
-       nft_set_gc_batch_complete(gcb);
-
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+done:
         queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                            nft_set_gc_interval(set));
  }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index a4631cb..a2935bd 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
  {
         union tpacket_uhdr h;
  
+       /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
         h.raw = frame;
         switch (po->tp_version) {
         case TPACKET_V1:
-               h.h1->tp_status = status;
+               WRITE_ONCE(h.h1->tp_status, status);
                 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
                 break;
         case TPACKET_V2:
-               h.h2->tp_status = status;
+               WRITE_ONCE(h.h2->tp_status, status);
                 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
                 break;
         case TPACKET_V3:
-               h.h3->tp_status = status;
+               WRITE_ONCE(h.h3->tp_status, status);
                 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
                 break;
         default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
  
         smp_rmb();
  
+       /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
         h.raw = frame;
         switch (po->tp_version) {
         case TPACKET_V1:
                 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
-               return h.h1->tp_status;
+               return READ_ONCE(h.h1->tp_status);
         case TPACKET_V2:
                 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
-               return h.h2->tp_status;
+               return READ_ONCE(h.h2->tp_status);
         case TPACKET_V3:
                 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
-               return h.h3->tp_status;
+               return READ_ONCE(h.h3->tp_status);
         default:
                 WARN(1, "TPACKET version not supported.\n");
                 BUG();
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c

index 0c013d2..f5834af 100644 (file)
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
         sk->sk_state = SMC_INIT;
         sk->sk_destruct = smc_destruct;
         sk->sk_protocol = protocol;
-       WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
-       WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+       WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+       WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
         smc = smc_sk(sk);
         INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
         INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,13 +436,60 @@ out:
         return rc;
  }
  
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
+
+#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
+                            (1UL << SOCK_KEEPOPEN) | \
+                            (1UL << SOCK_LINGER) | \
+                            (1UL << SOCK_BROADCAST) | \
+                            (1UL << SOCK_TIMESTAMP) | \
+                            (1UL << SOCK_DBG) | \
+                            (1UL << SOCK_RCVTSTAMP) | \
+                            (1UL << SOCK_RCVTSTAMPNS) | \
+                            (1UL << SOCK_LOCALROUTE) | \
+                            (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
+                            (1UL << SOCK_RXQ_OVFL) | \
+                            (1UL << SOCK_WIFI_STATUS) | \
+                            (1UL << SOCK_NOFCS) | \
+                            (1UL << SOCK_FILTER_LOCKED) | \
+                            (1UL << SOCK_TSTAMP_NEW))
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+                                    unsigned long mask)
+{
+       struct net *nnet = sock_net(nsk);
+
+       nsk->sk_userlocks = osk->sk_userlocks;
+       if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+               nsk->sk_sndbuf = osk->sk_sndbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_wmem));
+       }
+       if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+               nsk->sk_rcvbuf = osk->sk_rcvbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_rmem));
+       }
+}
+
  static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
                                    unsigned long mask)
  {
         /* options we don't get control via setsockopt for */
         nsk->sk_type = osk->sk_type;
-       nsk->sk_sndbuf = osk->sk_sndbuf;
-       nsk->sk_rcvbuf = osk->sk_rcvbuf;
         nsk->sk_sndtimeo = osk->sk_sndtimeo;
         nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
         nsk->sk_mark = READ_ONCE(osk->sk_mark);
@@ -453,26 +500,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
  
         nsk->sk_flags &= ~mask;
         nsk->sk_flags |= osk->sk_flags & mask;
+
+       smc_adjust_sock_bufsizes(nsk, osk, mask);
  }
  
-#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
-                            (1UL << SOCK_KEEPOPEN) | \
-                            (1UL << SOCK_LINGER) | \
-                            (1UL << SOCK_BROADCAST) | \
-                            (1UL << SOCK_TIMESTAMP) | \
-                            (1UL << SOCK_DBG) | \
-                            (1UL << SOCK_RCVTSTAMP) | \
-                            (1UL << SOCK_RCVTSTAMPNS) | \
-                            (1UL << SOCK_LOCALROUTE) | \
-                            (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
-                            (1UL << SOCK_RXQ_OVFL) | \
-                            (1UL << SOCK_WIFI_STATUS) | \
-                            (1UL << SOCK_NOFCS) | \
-                            (1UL << SOCK_FILTER_LOCKED) | \
-                            (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
  static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
  {
         smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
                 sock_hold(lsk); /* sock_put in smc_listen_work */
                 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                 smc_copy_sock_settings_to_smc(new_smc);
-               new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
-               new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
                 sock_hold(&new_smc->sk); /* sock_put in passive closing */
                 if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
                         sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h

index 2eeea4c..1f2b912 100644 (file)
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -161,7 +161,7 @@ struct smc_connection {
  
         struct smc_buf_desc     *sndbuf_desc;   /* send buffer descriptor */
         struct smc_buf_desc     *rmb_desc;      /* RMBE descriptor */
-       int                     rmbe_size_short;/* compressed notation */
+       int                     rmbe_size_comp; /* compressed notation */
         int                     rmbe_update_limit;
                                                 /* lower limit for consumer
                                                  * cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c

index b9b8b07..c90d9e5 100644 (file)
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                 clc->d0.gid =
                         conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
                 clc->d0.token = conn->rmb_desc->token;
-               clc->d0.dmbe_size = conn->rmbe_size_short;
+               clc->d0.dmbe_size = conn->rmbe_size_comp;
                 clc->d0.dmbe_idx = 0;
                 memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
                 if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                         clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
                         break;
                 }
-               clc->r0.rmbe_size = conn->rmbe_size_short;
+               clc->r0.rmbe_size = conn->rmbe_size_comp;
                 clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
                         cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
                         cpu_to_be64((u64)sg_dma_address
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c

index 3f465fa..6b78075 100644 (file)
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
         struct smc_connection *conn = &smc->conn;
         struct smc_link_group *lgr = conn->lgr;
         struct list_head *buf_list;
-       int bufsize, bufsize_short;
+       int bufsize, bufsize_comp;
         struct rw_semaphore *lock;      /* lock buffer list */
         bool is_dgraded = false;
-       int sk_buf_size;
  
         if (is_rmb)
                 /* use socket recv buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_rcvbuf;
+               bufsize = smc->sk.sk_rcvbuf / 2;
         else
                 /* use socket send buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_sndbuf;
+               bufsize = smc->sk.sk_sndbuf / 2;
  
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
-            bufsize_short >= 0; bufsize_short--) {
+       for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+            bufsize_comp >= 0; bufsize_comp--) {
                 if (is_rmb) {
                         lock = &lgr->rmbs_lock;
-                       buf_list = &lgr->rmbs[bufsize_short];
+                       buf_list = &lgr->rmbs[bufsize_comp];
                 } else {
                         lock = &lgr->sndbufs_lock;
-                       buf_list = &lgr->sndbufs[bufsize_short];
+                       buf_list = &lgr->sndbufs[bufsize_comp];
                 }
-               bufsize = smc_uncompress_bufsize(bufsize_short);
+               bufsize = smc_uncompress_bufsize(bufsize_comp);
  
                 /* check for reusable slot in the link group */
-               buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+               buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
                 if (buf_desc) {
                         buf_desc->is_dma_need_sync = 0;
                         SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
  
         if (is_rmb) {
                 conn->rmb_desc = buf_desc;
-               conn->rmbe_size_short = bufsize_short;
-               smc->sk.sk_rcvbuf = bufsize;
+               conn->rmbe_size_comp = bufsize_comp;
+               smc->sk.sk_rcvbuf = bufsize * 2;
                 atomic_set(&conn->bytes_to_rcv, 0);
                 conn->rmbe_update_limit =
                         smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
         } else {
                 conn->sndbuf_desc = buf_desc;
-               smc->sk.sk_sndbuf = bufsize;
+               smc->sk.sk_sndbuf = bufsize * 2;
                 atomic_set(&conn->sndbuf_space, bufsize);
         }
         return 0;
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c

index b6f79fa..0b2a957 100644 (file)
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
  
  static int min_sndbuf = SMC_BUF_MIN_SIZE;
  static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
  
  static struct ctl_table smc_table[] = {
         {
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_minmax,
                 .extra1         = &min_sndbuf,
+               .extra2         = &max_sndbuf,
         },
         {
                 .procname       = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_minmax,
                 .extra1         = &min_rcvbuf,
+               .extra2         = &max_rcvbuf,
         },
         {  }
  };
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
         net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
         net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
         net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
-       WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
-       WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+       WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+       WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
  
         return 0;
  
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c

index 2021fe5..529101e 100644 (file)
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
  static LIST_HEAD(tls_device_down_list);
  static DEFINE_SPINLOCK(tls_device_lock);
  
+static struct page *dummy_page;
+
  static void tls_device_free_ctx(struct tls_context *ctx)
  {
         if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
         return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
  }
  
-static int tls_device_record_close(struct sock *sk,
-                                  struct tls_context *ctx,
-                                  struct tls_record_info *record,
-                                  struct page_frag *pfrag,
-                                  unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+                                   struct tls_context *ctx,
+                                   struct tls_record_info *record,
+                                   struct page_frag *pfrag,
+                                   unsigned char record_type)
  {
         struct tls_prot_info *prot = &ctx->prot_info;
-       int ret;
+       struct page_frag dummy_tag_frag;
  
         /* append tag
          * device will fill in the tag, we just need to append a placeholder
          * use socket memory to improve coalescing (re-using a single buffer
          * increases frag count)
-        * if we can't allocate memory now, steal some back from data
+        * if we can't allocate memory now use the dummy page
          */
-       if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
-                                       sk->sk_allocation))) {
-               ret = 0;
-               tls_append_frag(record, pfrag, prot->tag_size);
-       } else {
-               ret = prot->tag_size;
-               if (record->len <= prot->overhead_size)
-                       return -ENOMEM;
+       if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+           !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+               dummy_tag_frag.page = dummy_page;
+               dummy_tag_frag.offset = 0;
+               pfrag = &dummy_tag_frag;
         }
+       tls_append_frag(record, pfrag, prot->tag_size);
  
         /* fill prepend */
         tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
                          record->len - prot->overhead_size,
                          record_type);
-       return ret;
  }
  
  static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
  
                 if (done || record->len >= max_open_record_len ||
                     (record->num_frags >= MAX_SKB_FRAGS - 1)) {
-                       rc = tls_device_record_close(sk, tls_ctx, record,
-                                                    pfrag, record_type);
-                       if (rc) {
-                               if (rc > 0) {
-                                       size += rc;
-                               } else {
-                                       size = orig_size;
-                                       destroy_record(record);
-                                       ctx->open_record = NULL;
-                                       break;
-                               }
-                       }
+                       tls_device_record_close(sk, tls_ctx, record,
+                                               pfrag, record_type);
  
                         rc = tls_push_record(sk,
                                              tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
  {
         int err;
  
-       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
-       if (!destruct_wq)
+       dummy_page = alloc_page(GFP_KERNEL);
+       if (!dummy_page)
                 return -ENOMEM;
  
+       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+       if (!destruct_wq) {
+               err = -ENOMEM;
+               goto err_free_dummy;
+       }
+
         err = register_netdevice_notifier(&tls_dev_notifier);
         if (err)
-               destroy_workqueue(destruct_wq);
+               goto err_destroy_wq;
  
+       return 0;
+
+err_destroy_wq:
+       destroy_workqueue(destruct_wq);
+err_free_dummy:
+       put_page(dummy_page);
         return err;
  }
  
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
         unregister_netdevice_notifier(&tls_dev_notifier);
         destroy_workqueue(destruct_wq);
         clean_acked_data_flush();
+       put_page(dummy_page);
  }
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c

index b689612..4a8ee2f 100644 (file)
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
  
         ctx->splicing_pages = true;
         while (1) {
-               if (sg_is_last(sg))
-                       msg.msg_flags = flags;
-
                 /* is sending application-limited? */
                 tcp_rate_check_app_limited(sk);
                 p = sg_page(sg);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index 0da2e6a..8bcf8e2 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
         if (!wiphy->mbssid_max_interfaces)
                 return ERR_PTR(-EINVAL);
  
-       nla_for_each_nested(nl_elems, attrs, rem_elems)
+       nla_for_each_nested(nl_elems, attrs, rem_elems) {
+               if (num_elems >= 255)
+                       return ERR_PTR(-EINVAL);
                 num_elems++;
+       }
  
         elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
         if (!elems)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c

index b89adb5..10ea85c 100644 (file)
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -994,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                                 err = xp_alloc_tx_descs(xs->pool, xs);
                                 if (err) {
                                         xp_put_pool(xs->pool);
+                                       xs->pool = NULL;
                                         sockfd_put(sock);
                                         goto out_unlock;
                                 }
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c

index b72b82b..b348e16 100644 (file)
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -9,7 +9,7 @@
  #include <linux/sysctl.h>
  #include "internal.h"
  
-struct ctl_table key_sysctls[] = {
+static struct ctl_table key_sysctls[] = {
         {
                 .procname = "maxkeys",
                 .data = &key_quota_maxkeys,
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

index cb8ca46..1f6d904 100644 (file)
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -14,7 +14,7 @@
   * Defines x86 CPU feature bits
   */
  #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
  
  /*
   * Note: If the comment begins with a quoted string, that string is used
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h

index 3aedae6..a00a53e 100644 (file)
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -545,6 +545,7 @@
  #define MSR_AMD64_DE_CFG               0xc0011029
  #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT   1
  #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE      BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
  
  #define MSR_AMD64_BU_CFG2              0xc001102a
  #define MSR_AMD64_IBSFETCHCTL          0xc0011030
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c

index 2e1caab..2d51fa8 100644 (file)
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -824,5 +824,8 @@ bool arch_is_retpoline(struct symbol *sym)
  
  bool arch_is_rethunk(struct symbol *sym)
  {
-       return !strcmp(sym->name, "__x86_return_thunk");
+       return !strcmp(sym->name, "__x86_return_thunk") ||
+              !strcmp(sym->name, "srso_untrain_ret") ||
+              !strcmp(sym->name, "srso_safe_ret") ||
+              !strcmp(sym->name, "__ret");
  }
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c

index 4e62843..f4cb41e 100644 (file)
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -45,7 +45,6 @@
  
  static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
                                      struct thread *th, bool lock);
-static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip);
  
  static struct dso *machine__kernel_dso(struct machine *machine)
  {
@@ -2385,10 +2384,6 @@ static int add_callchain_ip(struct thread *thread,
         ms.maps = maps__get(al.maps);
         ms.map = map__get(al.map);
         ms.sym = al.sym;
-
-       if (!branch && append_inlines(cursor, &ms, ip) == 0)
-               goto out;
-
         srcline = callchain_srcline(&ms, al.addr);
         err = callchain_cursor_append(cursor, ip, &ms,
                                       branch, flags, nr_loop_iter,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c

index 7329b33..d45d5dc 100644 (file)
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -931,6 +931,11 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
          */
         if (config->aggr_mode == AGGR_THREAD && config->system_wide)
                 return true;
+
+       /* Tool events have the software PMU but are only gathered on 1. */
+       if (evsel__is_tool(counter))
+               return true;
+
         /*
          * Skip value 0 when it's an uncore event and the given aggr id
          * does not belong to the PMU cpumask.
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c

index a61c7bc..63f468b 100644 (file)
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -177,7 +177,7 @@ void regression1_test(void)
         nr_threads = 2;
         pthread_barrier_init(&worker_barrier, NULL, nr_threads);
  
-       threads = malloc(nr_threads * sizeof(pthread_t *));
+       threads = malloc(nr_threads * sizeof(*threads));
  
         for (i = 0; i < nr_threads; i++) {
                 arg = i;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

index b4f6f3a..5674a9d 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
         xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
  }
  
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+       int s, c0, c1, p0, p1;
+       int err, n, key, value;
+       char buf[] = "abc";
+
+       key = 0;
+       value = sizeof(buf) - 1;
+       err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+       if (err)
+               return;
+
+       s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+       if (s < 0)
+               goto clean_parser_map;
+
+       err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+       if (err)
+               goto close_srv;
+
+       err = add_to_sockmap(sock_map, p0, p1);
+       if (err)
+               goto close;
+
+       n = xsend(c1, buf, sizeof(buf), 0);
+       if (n < sizeof(buf))
+               FAIL("incomplete write");
+
+       n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+       if (n != sizeof(buf) - 1)
+               FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+       xclose(c0);
+       xclose(p0);
+       xclose(c1);
+       xclose(p1);
+close_srv:
+       xclose(s);
+
+clean_parser_map:
+       key = 0;
+       value = 0;
+       xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+                                  struct bpf_map *inner_map, int family,
+                                  int sotype)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+       int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+       int parser_map = bpf_map__fd(skel->maps.parser_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+       if (err)
+               return;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+       if (err)
+               goto detach;
+
+       redir_partial(family, sotype, sock_map, parser_map);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+       xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
  static void test_reuseport_select_listening(int family, int sotype,
                                             int sock_map, int verd_map,
                                             int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
         } tests[] = {
                 TEST(test_skb_redir_to_connected),
                 TEST(test_skb_redir_to_listening),
+               TEST(test_skb_redir_partial),
                 TEST(test_msg_redir_to_connected),
                 TEST(test_msg_redir_to_listening),
         };
@@ -1432,7 +1504,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
         if (n < 1)
                 goto out;
  
-       n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+       n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
         if (n < 0)
                 FAIL("%s: recv() err, errno=%d", log_prefix, errno);
         if (n == 0)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c

index 325c9f1..464d35b 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,12 +28,26 @@ struct {
         __type(value, unsigned int);
  } verdict_map SEC(".maps");
  
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, int);
+       __type(value, int);
+} parser_map SEC(".maps");
+
  bool test_sockmap = false; /* toggled by user-space */
  bool test_ingress = false; /* toggled by user-space */
  
  SEC("sk_skb/stream_parser")
  int prog_stream_parser(struct __sk_buff *skb)
  {
+       int *value;
+       __u32 key = 0;
+
+       value = bpf_map_lookup_elem(&parser_map, &key);
+       if (value && *value)
+               return *value;
+
         return skb->len;
  }
  
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c

index 258ddc5..1b2cec9 100644 (file)
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -70,6 +70,10 @@ static int test_kmem_basic(const char *root)
                 goto cleanup;
  
         cg_write(cg, "memory.high", "1M");
+
+       /* wait for RCU freeing */
+       sleep(1);
+
         slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
         if (slab1 <= 0)
                 goto cleanup;
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c

index 435aceb..380b691 100644 (file)
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
                                 printf("Size must be greater than 0\n");
                                 return KSFT_FAIL;
                         }
+                       break;
                 case 't':
                         {
                                 int tmp = atoi(optarg);
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh

index 0f5e88c..df8d90b 100755 (executable)
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1981,6 +1981,11 @@ basic()
  
         run_cmd "$IP link set dev lo up"
  
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+       run_cmd "timeout 5 $IP nexthop"
+       log_test $? 0 "Maximum nexthop ID dump"
+
         #
         # groups
         #
@@ -2201,6 +2206,11 @@ basic_res()
         run_cmd "$IP nexthop bucket list fdb"
         log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
  
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+       run_cmd "timeout 5 $IP nexthop bucket"
+       log_test $? 0 "Maximum nexthop ID dump"
+
         #
         # resilient nexthop buckets get requests
         #
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh

index ae3f946..d0c6c49 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
                 grep -q "permanent"
         check_err $? "Entry not added as \"permanent\" when should"
         bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
         check_err $? "\"permanent\" entry has a pending group timer"
         bridge mdb del dev br0 port $swp1 $grp_key vid 10
  
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
                 grep -q "temp"
         check_err $? "Entry not added as \"temp\" when should"
         bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
         check_fail $? "\"temp\" entry has an unpending group timer"
         bridge mdb del dev br0 port $swp1 $grp_key vid 10
  
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
                 grep -q "permanent"
         check_err $? "Entry not marked as \"permanent\" after replace"
         bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
         check_err $? "Entry has a pending group timer after replace"
  
         bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
                 grep -q "temp"
         check_err $? "Entry not marked as \"temp\" after replace"
         bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
         check_fail $? "Entry has an unpending group timer after replace"
         bridge mdb del dev br0 port $swp1 $grp_key vid 10
  
@@ -850,6 +850,7 @@ cfg_test()
  __fwd_test_host_ip()
  {
         local grp=$1; shift
+       local dmac=$1; shift
         local src=$1; shift
         local mode=$1; shift
         local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
         # Packet should only be flooded to multicast router ports when there is
         # no matching MDB entry. The bridge is not configured as a multicast
         # router port.
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
         tc_check_packets "dev br0 ingress" 1 0
         check_err $? "Packet locally received after flood"
  
         # Install a regular port group entry and expect the packet to not be
         # locally received.
         bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
         tc_check_packets "dev br0 ingress" 1 0
         check_err $? "Packet locally received after installing a regular entry"
  
         # Add a host entry and expect the packet to be locally received.
         bridge mdb add dev br0 port br0 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
         tc_check_packets "dev br0 ingress" 1 1
         check_err $? "Packet not locally received after adding a host entry"
  
         # Remove the host entry and expect the packet to not be locally
         # received.
         bridge mdb del dev br0 port br0 grp $grp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
         tc_check_packets "dev br0 ingress" 1 1
         check_err $? "Packet locally received after removing a host entry"
  
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
  
  fwd_test_host_ip()
  {
-       __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
-       __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+       __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+       __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
  }
  
  fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
  __fwd_test_port_ip()
  {
         local grp=$1; shift
+       local dmac=$1; shift
         local valid_src=$1; shift
         local invalid_src=$1; shift
         local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
                 vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
                 src_ip $invalid_src action drop
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 1 0
         check_err $? "Packet from valid source received on H2 before adding entry"
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 2 0
         check_err $? "Packet from invalid source received on H2 before adding entry"
  
         bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
                 filter_mode $filter_mode source_list $src_list
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 1 1
         check_err $? "Packet from valid source not received on H2 after adding entry"
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 2 0
         check_err $? "Packet from invalid source received on H2 after adding entry"
  
         bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
                 filter_mode exclude
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 1 2
         check_err $? "Packet from valid source not received on H2 after allowing all sources"
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 2 1
         check_err $? "Packet from invalid source not received on H2 after allowing all sources"
  
         bridge mdb del dev br0 port $swp2 grp $grp vid 10
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 1 2
         check_err $? "Packet from valid source received on H2 after deleting entry"
  
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
         tc_check_packets "dev $h2 ingress" 2 1
         check_err $? "Packet from invalid source received on H2 after deleting entry"
  
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
  
  fwd_test_port_ip()
  {
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                 "exclude"
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                 "include"
  }
  
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
                 filter_mode include source_list 192.0.2.1
  
         # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
                 -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
  
         bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
                 filter_mode include source_list 192.0.2.1
  
         # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
                 -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
  
         bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
  
         # IS_IN ( 2001:db8:1::2 )
         local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                 -t ip hop=1,next=0,p="$p" -q
  
         bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
                 filter_mode include source_list 2001:db8:1::1
  
         # IS_IN ( 2001:db8:1::2 )
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                 -t ip hop=1,next=0,p="$p" -q
  
         bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
         ctrl_mldv2_is_in_test
  }
  
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+       echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+       exit $ksft_skip
+fi
+
  trap cleanup EXIT
  
  setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh

index ae255b6..3da9d93 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -252,7 +252,8 @@ ctl4_entries_add()
         local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
         local peer=$(locus_dev_peer $locus)
         local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+       local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
                 -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
         sleep 1
  
@@ -272,7 +273,8 @@ ctl4_entries_del()
  
         local peer=$(locus_dev_peer $locus)
         local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+       local dmac=01:00:5e:00:00:02
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
                 -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
         sleep 1
         ! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
         local peer=$(locus_dev_peer $locus)
         local SIP=fe80::1
         local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
         local p=$(mldv2_is_in_get $SIP $GRP $IPs)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
         sleep 1
  
         local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
         local peer=$(locus_dev_peer $locus)
         local SIP=fe80::1
         local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
         local p=$(mldv1_done_get $SIP $GRP)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
         sleep 1
         ! bridge mdb show dev br0 | grep -q $GRP
  }
@@ -1328,6 +1334,11 @@ test_8021qvs()
         switch_destroy
  }
  
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+       echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+       exit $ksft_skip
+fi
+
  trap cleanup EXIT
  
  setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh

index dbb9fcf..aa2eafb 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
         ethtool -s $h1 autoneg on
  }
  
+skip_on_veth
+
  trap cleanup EXIT
  
  setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh

index 072faa7..17f89c3 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -108,6 +108,8 @@ no_cable()
         ip link set dev $swp3 down
  }
  
+skip_on_veth
+
  setup_prepare
  
  tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh

index c580ad6..39e736f 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -258,11 +258,6 @@ h2_destroy()
  
  setup_prepare()
  {
-       check_ethtool_mm_support
-       check_tc_fp_support
-       require_command lldptool
-       bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
         h1=${NETIFS[p1]}
         h2=${NETIFS[p2]}
  
@@ -278,6 +273,19 @@ cleanup()
         h1_destroy
  }
  
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+       ethtool --show-mm $netif 2>&1 &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: $netif does not support MAC Merge"
+               exit $ksft_skip
+       fi
+done
+
  trap cleanup EXIT
  
  setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh

index eb9ec4a..7594bbb 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -99,6 +99,8 @@ test_stats_rx()
         test_stats g2a rx
  }
  
+skip_on_veth
+
  trap cleanup EXIT
  
  setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh

index 9f5b3e2..49fa94b 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -14,6 +14,8 @@ ALL_TESTS="
  NUM_NETIFS=4
  source lib.sh
  
+require_command $TROUTE6
+
  h1_create()
  {
         simple_if_init $h1 2001:1:1::2/64
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh

index 9ddb68d..f69015b 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
  REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
  STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
  TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
  
  relative_path="${BASH_SOURCE%/*}"
  if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
         fi
  }
  
+skip_on_veth()
+{
+       local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+               jq -r '.[].linkinfo.info_kind')
+
+       if [[ $kind == veth ]]; then
+               echo "SKIP: Test cannot be run with veth pairs"
+               exit $ksft_skip
+       fi
+}
+
  if [[ "$(id -u)" -ne 0 ]]; then
         echo "SKIP: need root privileges"
         exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
         for ((i = 1; i <= NUM_NETIFS; ++i)); do
                 local j=$((i+1))
  
+               if [ -z ${NETIFS[p$i]} ]; then
+                       echo "SKIP: Cannot create interface. Name not specified"
+                       exit $ksft_skip
+               fi
+
                 ip link show dev ${NETIFS[p$i]} &> /dev/null
                 if [[ $? -ne 0 ]]; then
                         ip link add ${NETIFS[p$i]} type veth \
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings

new file mode 100644 (file)

index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh

index a96cff8..b0f5e55 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -9,6 +9,8 @@ NUM_NETIFS=4
  source tc_common.sh
  source lib.sh
  
+require_command ncat
+
  tcflags="skip_hw"
  
  h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
                 ip_proto icmp \
                         action drop
  
-       ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2  &
+       ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
         local rpid=$!
-       ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+       ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
         wait -n $rpid
         cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
         check_err $? "server output check failed"
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh

index 683711f..b1daad1 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -52,8 +52,8 @@ match_dst_mac_test()
         tc_check_packets "dev $h2 ingress" 101 1
         check_fail $? "Matched on a wrong filter"
  
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
  
         tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
         tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
         tc_check_packets "dev $h2 ingress" 101 1
         check_fail $? "Matched on a wrong filter"
  
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
  
         tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
         tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh

index e22c2d2..20a7cb7 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -127,6 +127,7 @@ test_l2_miss_multicast_common()
         local proto=$1; shift
         local sip=$1; shift
         local dip=$1; shift
+       local dmac=$1; shift
         local mode=$1; shift
         local name=$1; shift
  
@@ -142,7 +143,7 @@ test_l2_miss_multicast_common()
            action pass
  
         # Before adding MDB entry.
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
  
         tc_check_packets "dev $swp2 egress" 101 1
         check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
@@ -153,7 +154,7 @@ test_l2_miss_multicast_common()
         # Adding MDB entry.
         bridge mdb replace dev br1 port $swp2 grp $dip permanent
  
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
  
         tc_check_packets "dev $swp2 egress" 101 1
         check_err $? "Unregistered multicast filter was hit after adding MDB entry"
@@ -164,7 +165,7 @@ test_l2_miss_multicast_common()
         # Deleting MDB entry.
         bridge mdb del dev br1 port $swp2 grp $dip
  
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
  
         tc_check_packets "dev $swp2 egress" 101 2
         check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
@@ -183,10 +184,11 @@ test_l2_miss_multicast_ipv4()
         local proto="ipv4"
         local sip=192.0.2.1
         local dip=239.1.1.1
+       local dmac=01:00:5e:01:01:01
         local mode="-4"
         local name="IPv4"
  
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
  }
  
  test_l2_miss_multicast_ipv6()
@@ -194,10 +196,11 @@ test_l2_miss_multicast_ipv6()
         local proto="ipv6"
         local sip=2001:db8:1::1
         local dip=ff0e::1
+       local dmac=33:33:00:00:00:01
         local mode="-6"
         local name="IPv6"
  
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
  }
  
  test_l2_miss_multicast()
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh

index 5ac184d..5a5dd90 100755 (executable)
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
         local i
  
         tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
-               flower ip_flags nofrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofrag action drop
         tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
-               flower ip_flags firstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags firstfrag action drop
         tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
-               flower ip_flags nofirstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofirstfrag action drop
  
         # test 'nofrag' set
         tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh

index 3c2096a..d01b73a 100755 (executable)
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -705,6 +705,7 @@ pm_nl_del_endpoint()
         local addr=$3
  
         if [ $ip_mptcp -eq 1 ]; then
+               [ $id -ne 0 ] && addr=''
                 ip -n $ns mptcp endpoint delete id $id $addr
         else
                 ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -795,10 +796,11 @@ pm_nl_check_endpoint()
         fi
  
         if [ $ip_mptcp -eq 1 ]; then
+               # get line and trim trailing whitespace
                 line=$(ip -n $ns mptcp endpoint show $id)
+               line="${line% }"
                 # the dump order is: address id flags port dev
-               expected_line="$addr"
-               [ -n "$addr" ] && expected_line="$expected_line $addr"
+               [ -n "$addr" ] && expected_line="$addr"
                 expected_line="$expected_line $id"
                 [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
                 [ -n "$dev" ] && expected_line="$expected_line $dev"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh

index dfe3d28..f838dd3 100755 (executable)
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -361,6 +361,7 @@ err_buf=
  tcpdump_pids=
  nettest_pids=
  socat_pids=
+tmpoutfile=
  
  err() {
         err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
         ip link del veth_A-R1                   2>/dev/null
         ovs-vsctl --if-exists del-port vxlan_a  2>/dev/null
         ovs-vsctl --if-exists del-br ovs_br0    2>/dev/null
+       rm -f "$tmpoutfile"
  }
  
  mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+       tmpoutfile=$(mktemp)
+
+       # Flush Exceptions, retry with TCP
+       run_cmd ${ns_a} ip route flush cached ${dst}
+       run_cmd ${ns_b} ip route flush cached ${dst}
+       run_cmd ${ns_c} ip route flush cached ${dst}
+
+       for target in "${ns_a}" "${ns_c}" ; do
+               if [ ${family} -eq 4 ]; then
+                       TCPDST=TCP:${dst}:50000
+               else
+                       TCPDST="TCP:[${dst}]:50000"
+               fi
+               ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+               sleep 1
+
+               dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+               size=$(du -sb $tmpoutfile)
+               size=${size%%/tmp/*}
+
+               [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+       done
+
+       rm -f "$tmpoutfile"
+
+       # Check that exceptions were created
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
  }
  
  test_pmtu_ipv4_br_vxlan4_exception() {
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile

index b357ba2..7a957c7 100644 (file)
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
  CLANG_FLAGS += -no-integrated-as
  endif
  
+top_srcdir = ../../../..
+
  CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
-         $(CLANG_FLAGS)
+         $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
  LDLIBS += -lpthread -ldl
  
  # Own dependencies because we only want to build against 1st prerequisite, but
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c

index a723da2..96e812b 100644 (file)
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -31,6 +31,8 @@
  #include <sys/auxv.h>
  #include <linux/auxvec.h>
  
+#include <linux/compiler.h>
+
  #include "../kselftest.h"
  #include "rseq.h"
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 12 Aug 2023 01:35:56 +0000 (18:35 -0700)
Documentation/ABI/testing/sysfs-devices-system-cpu		patch \| blob \| history
Documentation/ABI/testing/sysfs-platform-hidma		patch \| blob \| history
Documentation/ABI/testing/sysfs-platform-hidma-mgmt		patch \| blob \| history
Documentation/admin-guide/hw-vuln/gather_data_sampling.rst	[new file with mode: 0644]	patch \| blob
Documentation/admin-guide/hw-vuln/index.rst		patch \| blob \| history
Documentation/admin-guide/hw-vuln/srso.rst	[new file with mode: 0644]	patch \| blob
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/arm64/include/asm/el2_setup.h		patch \| blob \| history
arch/arm64/include/asm/kvm_asm.h		patch \| blob \| history
arch/arm64/include/asm/kvm_emulate.h		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/hyp/include/hyp/switch.h		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/ffa.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/switch.c		patch \| blob \| history
arch/parisc/Kconfig.debug		patch \| blob \| history
arch/parisc/boot/compressed/misc.c		patch \| blob \| history
arch/parisc/include/asm/dma.h		patch \| blob \| history
arch/parisc/include/asm/ftrace.h		patch \| blob \| history
arch/parisc/include/asm/spinlock.h		patch \| blob \| history
arch/parisc/include/asm/spinlock_types.h		patch \| blob \| history
arch/parisc/kernel/firmware.c		patch \| blob \| history
arch/parisc/kernel/ftrace.c		patch \| blob \| history
arch/parisc/kernel/parisc_ksyms.c		patch \| blob \| history
arch/parisc/kernel/pci-dma.c		patch \| blob \| history
arch/parisc/kernel/pdt.c		patch \| blob \| history
arch/parisc/kernel/perf.c		patch \| blob \| history
arch/parisc/kernel/processor.c		patch \| blob \| history
arch/parisc/kernel/setup.c		patch \| blob \| history
arch/parisc/kernel/signal.c		patch \| blob \| history
arch/parisc/kernel/sys_parisc.c		patch \| blob \| history
arch/parisc/kernel/syscall.S		patch \| blob \| history
arch/parisc/kernel/unaligned.c		patch \| blob \| history
arch/parisc/lib/ucmpdi2.c		patch \| blob \| history
arch/parisc/mm/fault.c		patch \| blob \| history
arch/parisc/mm/init.c		patch \| blob \| history
arch/parisc/mm/ioremap.c		patch \| blob \| history
arch/riscv/include/asm/cacheflush.h		patch \| blob \| history
arch/riscv/include/asm/mmio.h		patch \| blob \| history
arch/riscv/include/asm/pgtable.h		patch \| blob \| history
arch/riscv/include/asm/vmalloc.h		patch \| blob \| history
arch/riscv/kernel/cpu.c		patch \| blob \| history
arch/riscv/kernel/elf_kexec.c		patch \| blob \| history
arch/riscv/kernel/smp.c		patch \| blob \| history
arch/riscv/mm/init.c		patch \| blob \| history
arch/riscv/mm/kasan_init.c		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/acpi.h		patch \| blob \| history
arch/x86/include/asm/cpufeatures.h		patch \| blob \| history
arch/x86/include/asm/msr-index.h		patch \| blob \| history
arch/x86/include/asm/nospec-branch.h		patch \| blob \| history
arch/x86/include/asm/processor.h		patch \| blob \| history
arch/x86/kernel/acpi/boot.c		patch \| blob \| history
arch/x86/kernel/cpu/amd.c		patch \| blob \| history
arch/x86/kernel/cpu/bugs.c		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/cpu/cpu.h		patch \| blob \| history
arch/x86/kernel/traps.c		patch \| blob \| history
arch/x86/kernel/vmlinux.lds.S		patch \| blob \| history
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/svm/sev.c		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/svm/svm.h		patch \| blob \| history
arch/x86/kvm/svm/vmenter.S		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/lib/retpoline.S		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
block/blk-iocost.c		patch \| blob \| history
block/fops.c		patch \| blob \| history
drivers/accel/ivpu/ivpu_gem.c		patch \| blob \| history
drivers/acpi/resource.c		patch \| blob \| history
drivers/base/cpu.c		patch \| blob \| history
drivers/block/zram/zram_drv.c		patch \| blob \| history
drivers/char/tpm/tpm-chip.c		patch \| blob \| history
drivers/char/tpm/tpm_crb.c		patch \| blob \| history
drivers/char/tpm/tpm_tis.c		patch \| blob \| history
drivers/cpufreq/amd-pstate.c		patch \| blob \| history
drivers/cpuidle/cpuidle-psci-domain.c		patch \| blob \| history
drivers/cpuidle/dt_idle_genpd.c		patch \| blob \| history
drivers/cpuidle/dt_idle_genpd.h		patch \| blob \| history
drivers/dma/Kconfig		patch \| blob \| history
drivers/dma/idxd/device.c		patch \| blob \| history
drivers/dma/mcf-edma.c		patch \| blob \| history
drivers/dma/owl-dma.c		patch \| blob \| history
drivers/dma/pl330.c		patch \| blob \| history
drivers/dma/xilinx/xdma.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_crat.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c		patch \| blob \| history
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c		patch \| blob \| history
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c		patch \| blob \| history
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c		patch \| blob \| history
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c		patch \| blob \| history
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c		patch \| blob \| history
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c		patch \| blob \| history
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c		patch \| blob \| history
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c		patch \| blob \| history
drivers/gpu/drm/bridge/ite-it6505.c		patch \| blob \| history
drivers/gpu/drm/bridge/lontium-lt9611.c		patch \| blob \| history
drivers/gpu/drm/drm_gem_shmem_helper.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nouveau_connector.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c		patch \| blob \| history
drivers/gpu/drm/rockchip/rockchip_drm_vop.c		patch \| blob \| history
drivers/hwmon/aquacomputer_d5next.c		patch \| blob \| history
drivers/hwmon/pmbus/bel-pfe.c		patch \| blob \| history
drivers/isdn/mISDN/dsp.h		patch \| blob \| history
drivers/isdn/mISDN/dsp_cmx.c		patch \| blob \| history
drivers/isdn/mISDN/dsp_core.c		patch \| blob \| history
drivers/media/platform/qcom/venus/hfi_cmds.c		patch \| blob \| history
drivers/mmc/host/moxart-mmc.c		patch \| blob \| history
drivers/mmc/host/sdhci_f_sdh30.c		patch \| blob \| history
drivers/net/bonding/bond_main.c		patch \| blob \| history
drivers/net/dsa/ocelot/felix.c		patch \| blob \| history
drivers/net/ethernet/freescale/enetc/enetc_pf.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c		patch \| blob \| history
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h		patch \| blob \| history
drivers/net/ethernet/ibm/ibmvnic.c		patch \| blob \| history
drivers/net/ethernet/intel/iavf/iavf_ethtool.c		patch \| blob \| history
drivers/net/ethernet/intel/iavf/iavf_fdir.c		patch \| blob \| history
drivers/net/ethernet/intel/igc/igc.h		patch \| blob \| history
drivers/net/ethernet/intel/igc/igc_main.c		patch \| blob \| history
drivers/net/ethernet/marvell/prestera/prestera_router.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/sriov.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c		patch \| blob \| history
drivers/net/ethernet/microsoft/mana/mana_en.c		patch \| blob \| history
drivers/net/ethernet/pensando/ionic/ionic_lif.c		patch \| blob \| history
drivers/net/macsec.c		patch \| blob \| history
drivers/net/phy/at803x.c		patch \| blob \| history
drivers/net/tun.c		patch \| blob \| history
drivers/net/vxlan/vxlan_vnifilter.c		patch \| blob \| history
drivers/net/wireguard/allowedips.c		patch \| blob \| history
drivers/net/wireguard/selftest/allowedips.c		patch \| blob \| history
drivers/net/wireless/ath/ath12k/wmi.c		patch \| blob \| history
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c		patch \| blob \| history
drivers/net/wireless/realtek/rtw89/mac.c		patch \| blob \| history
drivers/net/xen-netback/netback.c		patch \| blob \| history
drivers/nvme/host/core.c		patch \| blob \| history
drivers/nvme/host/ioctl.c		patch \| blob \| history
drivers/nvme/host/pci.c		patch \| blob \| history
drivers/nvme/host/rdma.c		patch \| blob \| history
drivers/nvme/host/tcp.c		patch \| blob \| history
drivers/parisc/sba_iommu.c		patch \| blob \| history
drivers/pci/bus.c		patch \| blob \| history
drivers/pci/controller/Kconfig		patch \| blob \| history
drivers/pci/controller/dwc/pcie-designware-host.c		patch \| blob \| history
drivers/pci/controller/dwc/pcie-designware.c		patch \| blob \| history
drivers/pci/controller/dwc/pcie-designware.h		patch \| blob \| history
drivers/pci/hotplug/acpiphp_glue.c		patch \| blob \| history
drivers/pci/of.c		patch \| blob \| history
fs/gfs2/file.c		patch \| blob \| history
fs/gfs2/trans.c		patch \| blob \| history
fs/nilfs2/inode.c		patch \| blob \| history
fs/nilfs2/segment.c		patch \| blob \| history
fs/nilfs2/the_nilfs.h		patch \| blob \| history
fs/proc/kcore.c		patch \| blob \| history
fs/smb/server/smb2misc.c		patch \| blob \| history
fs/smb/server/smb2pdu.c		patch \| blob \| history
fs/vboxsf/shfl_hostintf.h		patch \| blob \| history
include/linux/bio.h		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/cpu.h		patch \| blob \| history
include/linux/skmsg.h		patch \| blob \| history
include/linux/tpm.h		patch \| blob \| history
include/net/cfg80211.h		patch \| blob \| history
include/net/netfilter/nf_tables.h		patch \| blob \| history
include/trace/events/tcp.h		patch \| blob \| history
io_uring/io_uring.c		patch \| blob \| history
io_uring/openclose.c		patch \| blob \| history
kernel/power/hibernate.c		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history
lib/scatterlist.c		patch \| blob \| history
mm/compaction.c		patch \| blob \| history
mm/damon/core.c		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/ksm.c		patch \| blob \| history
mm/memory-failure.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/swapfile.c		patch \| blob \| history
mm/zsmalloc.c		patch \| blob \| history
net/8021q/vlan.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/core/skmsg.c		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/core/sock_map.c		patch \| blob \| history
net/dccp/output.c		patch \| blob \| history
net/dccp/proto.c		patch \| blob \| history
net/ipv4/ip_tunnel_core.c		patch \| blob \| history
net/ipv4/nexthop.c		patch \| blob \| history
net/ipv6/ndisc.c		patch \| blob \| history
net/mptcp/protocol.c		patch \| blob \| history
net/mptcp/protocol.h		patch \| blob \| history
net/mptcp/subflow.c		patch \| blob \| history
net/netfilter/nf_tables_api.c		patch \| blob \| history
net/netfilter/nft_set_hash.c		patch \| blob \| history
net/netfilter/nft_set_pipapo.c		patch \| blob \| history
net/netfilter/nft_set_rbtree.c		patch \| blob \| history
net/packet/af_packet.c		patch \| blob \| history
net/smc/af_smc.c		patch \| blob \| history
net/smc/smc.h		patch \| blob \| history
net/smc/smc_clc.c		patch \| blob \| history
net/smc/smc_core.c		patch \| blob \| history
net/smc/smc_sysctl.c		patch \| blob \| history
net/tls/tls_device.c		patch \| blob \| history
net/tls/tls_main.c		patch \| blob \| history
net/wireless/nl80211.c		patch \| blob \| history
net/xdp/xsk.c		patch \| blob \| history
security/keys/sysctl.c		patch \| blob \| history
tools/arch/x86/include/asm/cpufeatures.h		patch \| blob \| history
tools/arch/x86/include/asm/msr-index.h		patch \| blob \| history
tools/objtool/arch/x86/decode.c		patch \| blob \| history
tools/perf/util/machine.c		patch \| blob \| history
tools/perf/util/stat-display.c		patch \| blob \| history
tools/testing/radix-tree/regression1.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_sockmap_listen.c		patch \| blob \| history
tools/testing/selftests/cgroup/test_kmem.c		patch \| blob \| history
tools/testing/selftests/mm/ksm_tests.c		patch \| blob \| history
tools/testing/selftests/net/fib_nexthops.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/bridge_mdb.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/bridge_mdb_max.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/ethtool.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/ethtool_extended_state.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/ethtool_mm.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/lib.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/settings	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/net/forwarding/tc_actions.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/tc_flower.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh		patch \| blob \| history
tools/testing/selftests/net/forwarding/tc_tunnel_key.sh		patch \| blob \| history
tools/testing/selftests/net/mptcp/mptcp_join.sh		patch \| blob \| history
tools/testing/selftests/net/pmtu.sh		patch \| blob \| history
tools/testing/selftests/rseq/Makefile		patch \| blob \| history
tools/testing/selftests/rseq/rseq.c		patch \| blob \| history