Merge tag 'pm-4.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Mar 2018 16:17:49 +0000 (08:17 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Mar 2018 16:17:49 +0000 (08:17 -0800)
Pull power management fixes from Rafael Wysocki:
 "These fix three issues in cpufreq drivers: one recent regression, one
  leftover Kconfig dependency and one old but "stable" material.

  Specifics:

   - Make the task scheduler load and utilization signals be
     frequency-invariant again after recent changes in the SCPI cpufreq
     driver (Dietmar Eggemann).

   - Drop an unnecessary leftover Kconfig dependency from the SCPI
     cpufreq driver (Sudeep Holla).

   - Fix the initialization of the s3c24xx cpufreq driver (Viresh
     Kumar)"

* tag 'pm-4.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: s3c24xx: Fix broken s3c_cpufreq_init()
  cpufreq: scpi: Fix incorrect arm_big_little config dependency
  cpufreq: scpi: invoke frequency-invariance setter function

213 files changed:
Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt [moved from Documentation/devicetree/bindings/misc/arm-charlcd.txt with 100% similarity]
Documentation/devicetree/bindings/power/wakeup-source.txt
Documentation/devicetree/bindings/thermal/imx-thermal.txt
Documentation/virtual/kvm/cpuid.txt
Documentation/virtual/kvm/msr.txt
MAINTAINERS
Makefile
arch/arc/Kconfig
arch/arc/boot/dts/axs101.dts
arch/arc/boot/dts/axs10x_mb.dtsi
arch/arc/boot/dts/haps_hs_idu.dts
arch/arc/boot/dts/nsim_700.dts
arch/arc/boot/dts/nsim_hs.dts
arch/arc/boot/dts/nsim_hs_idu.dts
arch/arc/boot/dts/nsimosci.dts
arch/arc/boot/dts/nsimosci_hs.dts
arch/arc/boot/dts/nsimosci_hs_idu.dts
arch/arc/include/asm/entry-arcv2.h
arch/arc/kernel/mcip.c
arch/arc/kernel/setup.c
arch/arc/kernel/smp.c
arch/arc/mm/cache.c
arch/arm/boot/dts/bcm11351.dtsi
arch/arm/boot/dts/bcm21664.dtsi
arch/arm/boot/dts/bcm2835.dtsi
arch/arm/boot/dts/bcm2836.dtsi
arch/arm/boot/dts/bcm2837.dtsi
arch/arm/boot/dts/bcm283x.dtsi
arch/arm/boot/dts/bcm958625hr.dts
arch/arm/boot/dts/gemini-dlink-dns-313.dts
arch/arm/boot/dts/imx6dl-icore-rqs.dts
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-uevm.dts
arch/arm/boot/dts/rk3036.dtsi
arch/arm/boot/dts/rk322x.dtsi
arch/arm/boot/dts/rk3288-phycore-som.dtsi
arch/arm/boot/dts/zx296702.dtsi
arch/arm/configs/omap2plus_defconfig
arch/arm/kvm/hyp/Makefile
arch/arm/kvm/hyp/banked-sr.c
arch/arm/mach-clps711x/board-dt.c
arch/arm/mach-davinci/board-dm355-evm.c
arch/arm/mach-davinci/board-dm355-leopard.c
arch/arm/mach-davinci/board-dm365-evm.c
arch/arm/mach-mvebu/Kconfig
arch/arm/mach-omap1/clock.c
arch/arm/mach-omap2/omap-wakeupgen.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/pm.c
arch/arm/mach-omap2/timer.c
arch/arm/plat-orion/common.c
arch/arm64/boot/dts/amlogic/meson-axg.dtsi
arch/arm64/boot/dts/amlogic/meson-gx.dtsi
arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi
arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
arch/arm64/boot/dts/mediatek/mt8173.dtsi
arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
arch/arm64/boot/dts/qcom/msm8996.dtsi
arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
arch/arm64/boot/dts/rockchip/rk3328.dtsi
arch/arm64/boot/dts/rockchip/rk3368.dtsi
arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi
arch/arm64/boot/dts/rockchip/rk3399.dtsi
arch/riscv/include/asm/barrier.h
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/vsie.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/include/asm/apm.h
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/refcount.h
arch/x86/include/asm/rmwcc.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/head_64.S
arch/x86/kernel/kvm.c
arch/x86/kernel/unwind_orc.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/Makefile
arch/x86/lib/retpoline.S
arch/x86/mm/fault.c
arch/x86/mm/mem_encrypt_boot.S
arch/x86/realmode/rm/trampoline_64.S
drivers/bus/ti-sysc.c
drivers/char/tpm/st33zp24/st33zp24.c
drivers/char/tpm/tpm-interface.c
drivers/char/tpm/tpm2-cmd.c
drivers/char/tpm/tpm_i2c_infineon.c
drivers/char/tpm/tpm_i2c_nuvoton.c
drivers/char/tpm/tpm_tis_core.c
drivers/crypto/ccp/psp-dev.c
drivers/edac/sb_edac.c
drivers/gpio/gpiolib-of.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_stream.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/drm_framebuffer.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/nouveau/nv50_display.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/md/md-multipath.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid10.h
drivers/md/raid5-log.h
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/memory/brcmstb_dpfe.c
drivers/pinctrl/meson/pinctrl-meson-axg.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel-vbtn.c
drivers/platform/x86/wmi.c
drivers/soc/imx/gpc.c
fs/xfs/scrub/agheader.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_super.c
include/drm/drm_drv.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/init.h
include/linux/jump_label.h
include/linux/kernel.h
include/linux/kvm_host.h
include/linux/nospec.h
include/soc/arc/mcip.h
include/uapi/drm/virtgpu_drm.h
include/uapi/linux/psp-sev.h
init/main.c
kernel/extable.c
kernel/jump_label.c
kernel/printk/printk.c
lib/dma-debug.c
lib/idr.c
samples/seccomp/Makefile
scripts/Makefile.build
sound/core/control.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/usb/quirks-table.h
sound/x86/intel_hdmi_audio.c
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/objtool/builtin-check.c
tools/objtool/builtin-orc.c
tools/objtool/builtin.h
tools/objtool/check.c
tools/objtool/check.h
tools/testing/radix-tree/idr-test.c
tools/testing/radix-tree/linux.c
tools/testing/radix-tree/linux/compiler_types.h [new file with mode: 0644]
tools/testing/radix-tree/linux/gfp.h
tools/testing/radix-tree/linux/slab.h
tools/testing/selftests/android/Makefile
tools/testing/selftests/futex/Makefile
tools/testing/selftests/memfd/config [new file with mode: 0644]
tools/testing/selftests/memory-hotplug/Makefile
tools/testing/selftests/pstore/config
tools/testing/selftests/sync/Makefile
tools/testing/selftests/vDSO/Makefile
tools/testing/selftests/vm/.gitignore
virt/kvm/arm/arch_timer.c
virt/kvm/kvm_main.c

index 3c81f78..5d254ab 100644 (file)
@@ -60,7 +60,7 @@ Examples
                #size-cells = <0>;
 
                button@1 {
-                       debounce_interval = <50>;
+                       debounce-interval = <50>;
                        wakeup-source;
                        linux,code = <116>;
                        label = "POWER";
index 28be51a..379eb76 100644 (file)
@@ -22,7 +22,32 @@ Optional properties:
 - clocks : thermal sensor's clock source.
 
 Example:
+ocotp: ocotp@21bc000 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,imx6sx-ocotp", "syscon";
+       reg = <0x021bc000 0x4000>;
+       clocks = <&clks IMX6SX_CLK_OCOTP>;
 
+       tempmon_calib: calib@38 {
+               reg = <0x38 4>;
+       };
+
+       tempmon_temp_grade: temp-grade@20 {
+               reg = <0x20 4>;
+       };
+};
+
+tempmon: tempmon {
+       compatible = "fsl,imx6sx-tempmon", "fsl,imx6q-tempmon";
+       interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+       fsl,tempmon = <&anatop>;
+       nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>;
+       nvmem-cell-names = "calib", "temp_grade";
+       clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>;
+};
+
+Legacy method (Deprecated):
 tempmon {
        compatible = "fsl,imx6q-tempmon";
        fsl,tempmon = <&anatop>;
index dcab6dc..87a7506 100644 (file)
@@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH           ||     9 || guest checks this feature bit
                                    ||       || before enabling paravirtualized
                                    ||       || tlb flush.
 ------------------------------------------------------------------------------
+KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
+                                   ||       || can be enabled by setting bit 2
+                                   ||       || when writing to msr 0x4b564d02
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
index 1ebecc1..f3f0d57 100644 (file)
@@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
        when asynchronous page faults are enabled on the vcpu 0 when
        disabled. Bit 1 is 1 if asynchronous page faults can be injected
        when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-       are delivered to L1 as #PF vmexits.
+       are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
+       KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
 
        First 4 byte of 64 byte memory location will be written to by
        the hypervisor at the time of asynchronous page fault (APF)
index 93a12af..1c95c60 100644 (file)
@@ -1238,7 +1238,7 @@ F:        drivers/clk/at91
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Alexandre Belloni <alexandre.belloni@free-electrons.com>
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.linux4sam.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git
@@ -1590,7 +1590,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
-M:     Gregory Clement <gregory.clement@free-electrons.com>
+M:     Gregory Clement <gregory.clement@bootlin.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/soc/dove/
@@ -1604,7 +1604,7 @@ F:        arch/arm/boot/dts/orion5x*
 ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
-M:     Gregory Clement <gregory.clement@free-electrons.com>
+M:     Gregory Clement <gregory.clement@bootlin.com>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -1999,8 +1999,10 @@ M:       Maxime Coquelin <mcoquelin.stm32@gmail.com>
 M:     Alexandre Torgue <alexandre.torgue@st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next
 N:     stm32
+F:     arch/arm/boot/dts/stm32*
+F:     arch/arm/mach-stm32/
 F:     drivers/clocksource/armv7m_systick.c
 
 ARM/TANGO ARCHITECTURE
@@ -10926,6 +10928,17 @@ L:     linux-gpio@vger.kernel.org
 S:     Supported
 F:     drivers/pinctrl/pinctrl-at91-pio4.*
 
+PIN CONTROLLER - FREESCALE
+M:     Dong Aisheng <aisheng.dong@nxp.com>
+M:     Fabio Estevam <festevam@gmail.com>
+M:     Shawn Guo <shawnguo@kernel.org>
+M:     Stefan Agner <stefan@agner.ch>
+R:     Pengutronix Kernel Team <kernel@pengutronix.de>
+L:     linux-gpio@vger.kernel.org
+S:     Maintained
+F:     drivers/pinctrl/freescale/
+F:     Documentation/devicetree/bindings/pinctrl/fsl,*
+
 PIN CONTROLLER - INTEL
 M:     Mika Westerberg <mika.westerberg@linux.intel.com>
 M:     Heikki Krogerus <heikki.krogerus@linux.intel.com>
index 659a778..fb94072 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 endif
 
+RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
+export RETPOLINE_CFLAGS
+
 ifeq ($(config-targets),1)
 # ===========================================================================
 # *config targets only - make sure prerequisites are updated, and descend
index f3a80cf..d76bf4a 100644 (file)
@@ -484,7 +484,6 @@ config ARC_CURR_IN_REG
 
 config ARC_EMUL_UNALIGNED
        bool "Emulate unaligned memory access (userspace only)"
-       default N
        select SYSCTL_ARCH_UNALIGN_NO_WARN
        select SYSCTL_ARCH_UNALIGN_ALLOW
        depends on ISA_ARCOMPACT
index 70aec7d..626b694 100644 (file)
@@ -17,6 +17,6 @@
        compatible = "snps,axs101", "snps,arc-sdp";
 
        chosen {
-               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60";
+               bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1";
        };
 };
index 74d070c..47b74fb 100644 (file)
                        };
 
                        eeprom@0x54{
-                               compatible = "24c01";
+                               compatible = "atmel,24c01";
                                reg = <0x54>;
                                pagesize = <0x8>;
                        };
 
                        eeprom@0x57{
-                               compatible = "24c04";
+                               compatible = "atmel,24c04";
                                reg = <0x57>;
                                pagesize = <0x8>;
                        };
index 215cddd..0c60330 100644 (file)
@@ -22,7 +22,7 @@
        };
 
        chosen {
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
        };
 
        aliases {
index 5ee96b0..ff2f2c7 100644 (file)
@@ -17,7 +17,7 @@
        interrupt-parent = <&core_intc>;
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 8d787b2..8e2489b 100644 (file)
@@ -24,7 +24,7 @@
        };
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 4f98ebf..ed12f49 100644 (file)
@@ -15,7 +15,7 @@
        interrupt-parent = <&core_intc>;
 
        chosen {
-               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
+               bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1";
        };
 
        aliases {
index 3c391ba..7842e5e 100644 (file)
@@ -20,7 +20,7 @@
                /* this is for console on PGU */
                /* bootargs = "console=tty0 consoleblank=0"; */
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 14a727c..b8838cf 100644 (file)
@@ -20,7 +20,7 @@
                /* this is for console on PGU */
                /* bootargs = "console=tty0 consoleblank=0"; */
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 5052917..72a2c72 100644 (file)
@@ -18,7 +18,7 @@
 
        chosen {
                /* this is for console on serial */
-               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24";
+               bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1";
        };
 
        aliases {
index 257a68f..309f4e6 100644 (file)
 .macro FAKE_RET_FROM_EXCPN
        lr      r9, [status32]
        bic     r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK)
-       or      r9, r9, (STATUS_L_MASK|STATUS_IE_MASK)
+       or      r9, r9, STATUS_IE_MASK
        kflag   r9
 .endm
 
index f61a52b..5fe84e4 100644 (file)
@@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);
 
 static char smp_cpuinfo_buf[128];
 
+/*
+ * Set mask to halt GFRC if any online core in SMP cluster is halted.
+ * Only works for ARC HS v3.0+, on earlier versions has no effect.
+ */
+static void mcip_update_gfrc_halt_mask(int cpu)
+{
+       struct bcr_generic gfrc;
+       unsigned long flags;
+       u32 gfrc_halt_mask;
+
+       READ_BCR(ARC_REG_GFRC_BUILD, gfrc);
+
+       /*
+        * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in
+        * GFRC 0x3 version.
+        */
+       if (gfrc.ver < 0x3)
+               return;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
+       __mcip_cmd(CMD_GFRC_READ_CORE, 0);
+       gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+       gfrc_halt_mask |= BIT(cpu);
+       __mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask);
+
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void mcip_update_debug_halt_mask(int cpu)
+{
+       u32 mcip_mask = 0;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&mcip_lock, flags);
+
+       /*
+        * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK
+        * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK
+        * and CMD_DEBUG_READ_SELECT.
+        */
+       __mcip_cmd(CMD_DEBUG_READ_SELECT, 0);
+       mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK);
+
+       mcip_mask |= BIT(cpu);
+
+       __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask);
+       /*
+        * Parameter specified halt cause:
+        * STATUS32[H]/actionpoint/breakpoint/self-halt
+        * We choose all of them (0xF).
+        */
+       __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask);
+
+       raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
 static void mcip_setup_per_cpu(int cpu)
 {
+       struct mcip_bcr mp;
+
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
+
        smp_ipi_irq_setup(cpu, IPI_IRQ);
        smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
+
+       /* Update GFRC halt mask as new CPU came online */
+       if (mp.gfrc)
+               mcip_update_gfrc_halt_mask(cpu);
+
+       /* Update MCIP debug mask as new CPU came online */
+       if (mp.dbg)
+               mcip_update_debug_halt_mask(cpu);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void)
                IS_AVAIL1(mp.gfrc, "GFRC"));
 
        cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
-
-       if (mp.dbg) {
-               __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
-               __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf);
-       }
 }
 
 struct plat_smp_ops plat_smp_ops = {
index ec12fe1..b2cae79 100644 (file)
@@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = {
        { 0x51, "R2.0" },
        { 0x52, "R2.1" },
        { 0x53, "R3.0" },
-       { 0x54, "R4.0" },
+       { 0x54, "R3.10a" },
 #endif
        { 0x00, NULL   }
 };
index efe8b42..21d86c3 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/reboot.h>
 #include <linux/irqdomain.h>
 #include <linux/export.h>
+#include <linux/of_fdt.h>
 
 #include <asm/processor.h>
 #include <asm/setup.h>
@@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void)
 {
 }
 
+static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask)
+{
+       unsigned long dt_root = of_get_flat_dt_root();
+       const char *buf;
+
+       buf = of_get_flat_dt_prop(dt_root, name, NULL);
+       if (!buf)
+               return -EINVAL;
+
+       if (cpulist_parse(buf, cpumask))
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * Read from DeviceTree and setup cpu possible mask. If there is no
+ * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist.
+ */
+static void __init arc_init_cpu_possible(void)
+{
+       struct cpumask cpumask;
+
+       if (arc_get_cpu_map("possible-cpus", &cpumask)) {
+               pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n",
+                       NR_CPUS);
+
+               cpumask_setall(&cpumask);
+       }
+
+       if (!cpumask_test_cpu(0, &cpumask))
+               panic("Master cpu (cpu[0]) is missed in cpu possible mask!");
+
+       init_cpu_possible(&cpumask);
+}
+
 /*
  * Called from setup_arch() before calling setup_processor()
  *
@@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void)
  */
 void __init smp_init_cpus(void)
 {
-       unsigned int i;
-
-       for (i = 0; i < NR_CPUS; i++)
-               set_cpu_possible(i, true);
+       arc_init_cpu_possible();
 
        if (plat_smp_ops.init_early_smp)
                plat_smp_ops.init_early_smp();
@@ -70,16 +104,12 @@ void __init smp_init_cpus(void)
 /* called from init ( ) =>  process 1 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-       int i;
-
        /*
         * if platform didn't set the present map already, do it now
         * boot cpu is set to present already by init/main.c
         */
-       if (num_present_cpus() <= 1) {
-               for (i = 0; i < max_cpus; i++)
-                       set_cpu_present(i, true);
-       }
+       if (num_present_cpus() <= 1)
+               init_cpu_present(cpu_possible_mask);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
index eee924d..2072f34 100644 (file)
@@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op)
 
        write_aux_reg(r, ctrl);
 
-       write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
+       if (op & OP_INV)        /* Inv or flush-n-inv use same cmd reg */
+               write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1);
+       else
+               write_aux_reg(ARC_REG_SLC_FLUSH, 0x1);
 
        /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
        read_aux_reg(r);
index 18045c3..db7cded 100644 (file)
@@ -55,7 +55,7 @@
                      <0x3ff00100 0x100>;
        };
 
-       smc@0x3404c000 {
+       smc@3404c000 {
                compatible = "brcm,bcm11351-smc", "brcm,kona-smc";
                reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */
        };
index 6dde95f..266f261 100644 (file)
@@ -55,7 +55,7 @@
                      <0x3ff00100 0x100>;
        };
 
-       smc@0x3404e000 {
+       smc@3404e000 {
                compatible = "brcm,bcm21664-smc", "brcm,kona-smc";
                reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */
        };
index 0e3d2a5..a5c3824 100644 (file)
        soc {
                ranges = <0x7e000000 0x20000000 0x02000000>;
                dma-ranges = <0x40000000 0x00000000 0x20000000>;
+       };
 
-               arm-pmu {
-                       compatible = "arm,arm1176-pmu";
-               };
+       arm-pmu {
+               compatible = "arm,arm1176-pmu";
        };
 };
 
index 1dfd764..c933e84 100644 (file)
@@ -9,19 +9,19 @@
                         <0x40000000 0x40000000 0x00001000>;
                dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-               local_intc: local_intc {
+               local_intc: local_intc@40000000 {
                        compatible = "brcm,bcm2836-l1-intc";
                        reg = <0x40000000 0x100>;
                        interrupt-controller;
                        #interrupt-cells = <2>;
                        interrupt-parent = <&local_intc>;
                };
+       };
 
-               arm-pmu {
-                       compatible = "arm,cortex-a7-pmu";
-                       interrupt-parent = <&local_intc>;
-                       interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
-               };
+       arm-pmu {
+               compatible = "arm,cortex-a7-pmu";
+               interrupt-parent = <&local_intc>;
+               interrupts = <9 IRQ_TYPE_LEVEL_HIGH>;
        };
 
        timer {
index efa7d33..7704bb0 100644 (file)
@@ -8,7 +8,7 @@
                         <0x40000000 0x40000000 0x00001000>;
                dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
 
-               local_intc: local_intc {
+               local_intc: local_intc@40000000 {
                        compatible = "brcm,bcm2836-l1-intc";
                        reg = <0x40000000 0x100>;
                        interrupt-controller;
index 18db25a..9d293de 100644 (file)
                        status = "disabled";
                };
 
-               aux: aux@0x7e215000 {
+               aux: aux@7e215000 {
                        compatible = "brcm,bcm2835-aux";
                        #clock-cells = <1>;
                        reg = <0x7e215000 0x8>;
index 6a44b80..f0e2008 100644 (file)
@@ -49,7 +49,7 @@
 
        memory {
                device_type = "memory";
-               reg = <0x60000000 0x80000000>;
+               reg = <0x60000000 0x20000000>;
        };
 
        gpio-restart {
index 08568ce..da8bb9d 100644 (file)
 
                sata: sata@46000000 {
                        /* The ROM uses this muxmode */
-                       cortina,gemini-ata-muxmode = <3>;
+                       cortina,gemini-ata-muxmode = <0>;
                        cortina,gemini-enable-sata-bridge;
                        status = "okay";
                };
index cf42c2f..1281bc3 100644 (file)
@@ -42,7 +42,7 @@
 
 /dts-v1/;
 
-#include "imx6q.dtsi"
+#include "imx6dl.dtsi"
 #include "imx6qdl-icore-rqs.dtsi"
 
 / {
index c1aa7a4..a30ee9f 100644 (file)
@@ -71,6 +71,8 @@
 };
 
 &i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
        clock-frequency = <2600000>;
 
        twl: twl@48 {
                >;
        };
 
-
+       i2c1_pins: pinmux_i2c1_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+                       OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+               >;
+       };
 };
 
 &omap3_pmx_wkup {
index b50b796..4791544 100644 (file)
@@ -66,6 +66,8 @@
 };
 
 &i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&i2c1_pins>;
        clock-frequency = <2600000>;
 
        twl: twl@48 {
                        OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0)        /* hsusb0_data7.hsusb0_data7 */
                >;
        };
+       i2c1_pins: pinmux_i2c1_pins {
+               pinctrl-single,pins = <
+                       OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0)        /* i2c1_scl.i2c1_scl */
+                       OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0)        /* i2c1_sda.i2c1_sda */
+               >;
+       };
 };
 
 &uart2 {
index ec2c8ba..592e17f 100644 (file)
@@ -47,7 +47,7 @@
                        gpios = <&gpio3 19 GPIO_ACTIVE_LOW>;    /* gpio3_83 */
                        wakeup-source;
                        autorepeat;
-                       debounce_interval = <50>;
+                       debounce-interval = <50>;
                };
        };
 
index 3b704cf..a974581 100644 (file)
                max-frequency = <37500000>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
                resets = <&cru SRST_SDIO>;
                max-frequency = <37500000>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                default-sample-phase = <158>;
                disable-wp;
                dmas = <&pdma 12>;
index 780ec3a..341deaf 100644 (file)
                interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
                         <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                pinctrl-names = "default";
                pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>;
                interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                pinctrl-names = "default";
                pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>;
                max-frequency = <37500000>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                bus-width = <8>;
                default-sample-phase = <158>;
                fifo-depth = <0x100>;
index 99cfae8..5eae477 100644 (file)
        };
 };
 
-&cpu0 {
-       cpu0-supply = <&vdd_cpu>;
-       operating-points = <
-               /* KHz    uV */
-               1800000 1400000
-               1608000 1350000
-               1512000 1300000
-               1416000 1200000
-               1200000 1100000
-               1008000 1050000
-                816000 1000000
-                696000  950000
-                600000  900000
-                408000  900000
-                312000  900000
-                216000  900000
-                126000  900000
-       >;
-};
-
 &emmc {
        status = "okay";
        bus-width = <8>;
index 8a74efd..240e7a2 100644 (file)
@@ -56,7 +56,7 @@
                        clocks = <&topclk ZX296702_A9_PERIPHCLK>;
                };
 
-               l2cc: l2-cache-controller@0x00c00000 {
+               l2cc: l2-cache-controller@c00000 {
                        compatible = "arm,pl310-cache";
                        reg = <0x00c00000 0x1000>;
                        cache-unified;
                        arm,double-linefill-incr = <0>;
                };
 
-               pcu: pcu@0xa0008000 {
+               pcu: pcu@a0008000 {
                        compatible = "zte,zx296702-pcu";
                        reg = <0xa0008000 0x1000>;
                };
 
-               topclk: topclk@0x09800000 {
+               topclk: topclk@9800000 {
                        compatible = "zte,zx296702-topcrm-clk";
                        reg = <0x09800000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               lsp1clk: lsp1clk@0x09400000 {
+               lsp1clk: lsp1clk@9400000 {
                        compatible = "zte,zx296702-lsp1crpm-clk";
                        reg = <0x09400000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               lsp0clk: lsp0clk@0x0b000000 {
+               lsp0clk: lsp0clk@b000000 {
                        compatible = "zte,zx296702-lsp0crpm-clk";
                        reg = <0x0b000000 0x1000>;
                        #clock-cells = <1>;
                };
 
-               uart0: serial@0x09405000 {
+               uart0: serial@9405000 {
                        compatible = "zte,zx296702-uart";
                        reg = <0x09405000 0x1000>;
                        interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
@@ -98,7 +98,7 @@
                        status = "disabled";
                };
 
-               uart1: serial@0x09406000 {
+               uart1: serial@9406000 {
                        compatible = "zte,zx296702-uart";
                        reg = <0x09406000 0x1000>;
                        interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
                        status = "disabled";
                };
 
-               mmc0: mmc@0x09408000 {
+               mmc0: mmc@9408000 {
                        compatible = "snps,dw-mshc";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
                };
 
-               mmc1: mmc@0x0b003000 {
+               mmc1: mmc@b003000 {
                        compatible = "snps,dw-mshc";
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
                };
 
-               sysctrl: sysctrl@0xa0007000 {
+               sysctrl: sysctrl@a0007000 {
                        compatible = "zte,sysctrl", "syscon";
                        reg = <0xa0007000 0x1000>;
                };
index 2f145c4..92674f2 100644 (file)
@@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y
 CONFIG_RC_CORE=m
 CONFIG_MEDIA_CONTROLLER=y
 CONFIG_VIDEO_V4L2_SUBDEV_API=y
-CONFIG_LIRC=m
+CONFIG_LIRC=y
 CONFIG_RC_DEVICES=y
 CONFIG_IR_RX51=m
 CONFIG_V4L_PLATFORM_DRIVERS=y
index 5638ce0..63d6b40 100644 (file)
@@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
+CFLAGS_ARMV7VE            :=$(call cc-option, -march=armv7ve)
+
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
@@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
 obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
+CFLAGS_banked-sr.o        += $(CFLAGS_ARMV7VE)
+
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+CFLAGS_switch.o                   += $(CFLAGS_ARMV7VE)
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
index 111bda8..be4b8b0 100644 (file)
 
 #include <asm/kvm_hyp.h>
 
+/*
+ * gcc before 4.9 doesn't understand -march=armv7ve, so we have to
+ * trick the assembler.
+ */
 __asm__(".arch_extension     virt");
 
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
index ee1f83b..4c89a8e 100644 (file)
@@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd)
        soft_restart(0);
 }
 
-static const char *clps711x_compat[] __initconst = {
+static const char *const clps711x_compat[] __initconst = {
        "cirrus,ep7209",
        NULL
 };
index e457f29..d6b1190 100644 (file)
@@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm355_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm355_evm_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640a,
index be99724..fad9a56 100644 (file)
@@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm355_leopard_spi_info[] __initconst = {
+static const struct spi_board_info dm355_leopard_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640a,
index e75741f..e378098 100644 (file)
@@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = {
        .flags          = EE_ADDR2,
 };
 
-static struct spi_board_info dm365_evm_spi_info[] __initconst = {
+static const struct spi_board_info dm365_evm_spi_info[] __initconst = {
        {
                .modalias       = "at25",
                .platform_data  = &at25640,
index 6b32dc5..2c20599 100644 (file)
@@ -41,7 +41,7 @@ config MACH_ARMADA_375
        depends on ARCH_MULTI_V7
        select ARMADA_370_XP_IRQ
        select ARM_ERRATA_720789
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select ARM_GIC
        select ARMADA_375_CLK
        select HAVE_ARM_SCU
@@ -57,7 +57,7 @@ config MACH_ARMADA_38X
        bool "Marvell Armada 380/385 boards"
        depends on ARCH_MULTI_V7
        select ARM_ERRATA_720789
-       select ARM_ERRATA_753970
+       select PL310_ERRATA_753970
        select ARM_GIC
        select ARM_GLOBAL_TIMER
        select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
index 43e3e18..fa51241 100644 (file)
@@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c)
                return -ENOMEM;
        c->dent = d;
 
-       d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount);
+       d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
        }
-       d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate);
+       d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
        }
-       d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags);
+       d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
        if (!d) {
                err = -ENOMEM;
                goto err_out;
index 4bb6751..fc5fb77 100644 (file)
@@ -299,8 +299,6 @@ static void irq_save_context(void)
        if (soc_is_dra7xx())
                return;
 
-       if (!sar_base)
-               sar_base = omap4_get_sar_ram_base();
        if (wakeupgen_ops && wakeupgen_ops->save_context)
                wakeupgen_ops->save_context();
 }
@@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node,
        irq_hotplug_init();
        irq_pm_init();
 
+       sar_base = omap4_get_sar_ram_base();
+
        return 0;
 }
 IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init);
index 124f9af..34156ec 100644 (file)
@@ -977,6 +977,9 @@ static int _enable_clocks(struct omap_hwmod *oh)
 
        pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name);
 
+       if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+               _enable_optional_clocks(oh);
+
        if (oh->_clk)
                clk_enable(oh->_clk);
 
@@ -985,9 +988,6 @@ static int _enable_clocks(struct omap_hwmod *oh)
                        clk_enable(os->_clk);
        }
 
-       if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
-               _enable_optional_clocks(oh);
-
        /* The opt clocks are controlled by the device driver. */
 
        return 0;
index 366158a..6f68576 100644 (file)
@@ -186,7 +186,7 @@ static void omap_pm_end(void)
        cpu_idle_poll_ctrl(false);
 }
 
-static void omap_pm_finish(void)
+static void omap_pm_wake(void)
 {
        if (soc_is_omap34xx())
                omap_prcm_irq_complete();
@@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
        .begin          = omap_pm_begin,
        .end            = omap_pm_end,
        .enter          = omap_pm_enter,
-       .finish         = omap_pm_finish,
+       .wake           = omap_pm_wake,
        .valid          = suspend_valid_only_mem,
 };
 
index ece09c9..d61fbd7 100644 (file)
@@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = {
        .tick_resume            = omap2_gp_timer_shutdown,
 };
 
-static struct property device_disabled = {
-       .name = "status",
-       .length = sizeof("disabled"),
-       .value = "disabled",
-};
-
 static const struct of_device_id omap_timer_match[] __initconst = {
        { .compatible = "ti,omap2420-timer", },
        { .compatible = "ti,omap3430-timer", },
@@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *
                                  of_get_property(np, "ti,timer-secure", NULL)))
                        continue;
 
-               if (!of_device_is_compatible(np, "ti,omap-counter32k"))
-                       of_add_property(np, &device_disabled);
+               if (!of_device_is_compatible(np, "ti,omap-counter32k")) {
+                       struct property *prop;
+
+                       prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+                       if (!prop)
+                               return NULL;
+                       prop->name = "status";
+                       prop->value = "disabled";
+                       prop->length = strlen(prop->value);
+                       of_add_property(np, prop);
+               }
                return np;
        }
 
index aff6994..a2399fd 100644 (file)
@@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 /*****************************************************************************
  * Ethernet switch
  ****************************************************************************/
-static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii";
-static __initdata struct mdio_board_info
-                 orion_ge00_switch_board_info;
+static __initdata struct mdio_board_info orion_ge00_switch_board_info = {
+       .bus_id   = "orion-mii",
+       .modalias = "mv88e6085",
+};
 
 void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 {
-       struct mdio_board_info *bd;
        unsigned int i;
 
        if (!IS_BUILTIN(CONFIG_PHYLIB))
                return;
 
-       for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
-               if (!strcmp(d->port_names[i], "cpu"))
+       for (i = 0; i < ARRAY_SIZE(d->port_names); i++) {
+               if (!strcmp(d->port_names[i], "cpu")) {
+                       d->netdev[i] = &orion_ge00.dev;
                        break;
+               }
+       }
 
-       bd = &orion_ge00_switch_board_info;
-       bd->bus_id = orion_ge00_mvmdio_bus_name;
-       bd->mdio_addr = d->sw_addr;
-       d->netdev[i] = &orion_ge00.dev;
-       strcpy(bd->modalias, "mv88e6085");
-       bd->platform_data = d;
+       orion_ge00_switch_board_info.mdio_addr = d->sw_addr;
+       orion_ge00_switch_board_info.platform_data = d;
 
        mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
 }
index a806326..70c776e 100644 (file)
 
                        uart_A: serial@24000 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-                               reg = <0x0 0x24000 0x0 0x14>;
+                               reg = <0x0 0x24000 0x0 0x18>;
                                interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_B: serial@23000 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart";
-                               reg = <0x0 0x23000 0x0 0x14>;
+                               reg = <0x0 0x23000 0x0 0x18>;
                                interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
index 6cb3c2a..4ee2e79 100644 (file)
 
                        uart_A: serial@84c0 {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x84c0 0x0 0x14>;
+                               reg = <0x0 0x84c0 0x0 0x18>;
                                interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_B: serial@84dc {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x84dc 0x0 0x14>;
+                               reg = <0x0 0x84dc 0x0 0x18>;
                                interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_C: serial@8700 {
                                compatible = "amlogic,meson-gx-uart";
-                               reg = <0x0 0x8700 0x0 0x14>;
+                               reg = <0x0 0x8700 0x0 0x18>;
                                interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_AO: serial@4c0 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-                               reg = <0x0 0x004c0 0x0 0x14>;
+                               reg = <0x0 0x004c0 0x0 0x18>;
                                interrupts = <GIC_SPI 193 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
 
                        uart_AO_B: serial@4e0 {
                                compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart";
-                               reg = <0x0 0x004e0 0x0 0x14>;
+                               reg = <0x0 0x004e0 0x0 0x18>;
                                interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
                                status = "disabled";
                        };
index 4f355f1..c851411 100644 (file)
 
                        internal_phy: ethernet-phy@8 {
                                compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22";
+                               interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
                                reg = <8>;
                                max-speed = <100>;
                        };
index 4220fbd..ff5c4c4 100644 (file)
@@ -98,7 +98,7 @@
                clock-output-names = "clk125mhz";
        };
 
-       pci {
+       pcie@30000000 {
                compatible = "pci-host-ecam-generic";
                device_type = "pci";
                #interrupt-cells = <1>;
                ranges =
                  <0x02000000    0 0x40000000    0 0x40000000    0 0x20000000
                   0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>;
+               bus-range = <0 0xff>;
                interrupt-map-mask = <0 0 0 7>;
                interrupt-map =
                      /* addr  pin  ic   icaddr  icintr */
index e94fa1a..047641f 100644 (file)
@@ -51,7 +51,7 @@
                #size-cells = <2>;
                ranges;
 
-               ramoops@0x21f00000 {
+               ramoops@21f00000 {
                        compatible = "ramoops";
                        reg = <0x0 0x21f00000 0x0 0x00100000>;
                        record-size     = <0x00020000>;
index 9fbe470..94597e3 100644 (file)
                        reg = <0 0x10005000 0 0x1000>;
                };
 
-               pio: pinctrl@0x10005000 {
+               pio: pinctrl@10005000 {
                        compatible = "mediatek,mt8173-pinctrl";
                        reg = <0 0x1000b000 0 0x1000>;
                        mediatek,pctl-regmap = <&syscfg_pctl_a>;
index 492a011..1c8f1b8 100644 (file)
                };
 
                agnoc@0 {
-                       qcom,pcie@00600000 {
+                       qcom,pcie@600000 {
                                perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
                        };
 
-                       qcom,pcie@00608000 {
+                       qcom,pcie@608000 {
                                status = "okay";
                                perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>;
                        };
 
-                       qcom,pcie@00610000 {
+                       qcom,pcie@610000 {
                                status = "okay";
                                perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>;
                        };
index 4b2afcc..0a6f795 100644 (file)
                        #size-cells = <1>;
                        ranges;
 
-                       pcie0: qcom,pcie@00600000 {
+                       pcie0: qcom,pcie@600000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                status = "disabled";
                                power-domains = <&gcc PCIE0_GDSC>;
 
                        };
 
-                       pcie1: qcom,pcie@00608000 {
+                       pcie1: qcom,pcie@608000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                power-domains = <&gcc PCIE1_GDSC>;
                                bus-range = <0x00 0xff>;
                                                "bus_slave";
                        };
 
-                       pcie2: qcom,pcie@00610000 {
+                       pcie2: qcom,pcie@610000 {
                                compatible = "qcom,pcie-msm8996", "snps,dw-pcie";
                                power-domains = <&gcc PCIE2_GDSC>;
                                bus-range = <0x00 0xff>;
index 3890468..2825772 100644 (file)
        assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
        assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
        clock_in_out = "input";
-       /* shows instability at 1GBit right now */
-       max-speed = <100>;
        phy-supply = <&vcc_io>;
        phy-mode = "rgmii";
        pinctrl-names = "default";
        pinctrl-0 = <&rgmiim1_pins>;
+       snps,force_thresh_dma_mode;
        snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>;
        snps,reset-active-low;
        snps,reset-delays-us = <0 10000 50000>;
-       tx_delay = <0x26>;
-       rx_delay = <0x11>;
+       tx_delay = <0x24>;
+       rx_delay = <0x18>;
        status = "okay";
 };
 
index a037ee5..cae3415 100644 (file)
                interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
                         <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
                interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>,
                         <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
                interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
                         <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                status = "disabled";
        };
index aa4d070..03458ac 100644 (file)
                max-frequency = <150000000>;
                clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>,
                         <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>;
-               clock-names = "biu", "ciu", "ciu_drv", "ciu_sample";
+               clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
                fifo-depth = <0x100>;
                interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
                resets = <&cru SRST_SDIO0>;
index 0f873c8..ce592a4 100644 (file)
        assigned-clocks = <&cru SCLK_PCIEPHY_REF>;
        assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>;
        assigned-clock-rates = <100000000>;
-       ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>;
+       ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>;
        num-lanes = <4>;
        pinctrl-names = "default";
        pinctrl-0 = <&pcie_clkreqn_cpm>;
index 7aa2144..2605118 100644 (file)
                compatible = "rockchip,rk3399-edp";
                reg = <0x0 0xff970000 0x0 0x8000>;
                interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH 0>;
-               clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>;
-               clock-names = "dp", "pclk";
+               clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>;
+               clock-names = "dp", "pclk", "grf";
                pinctrl-names = "default";
                pinctrl-0 = <&edp_hpd>;
                power-domains = <&power RK3399_PD_EDP>;
index c0319cb..5510366 100644 (file)
@@ -34,9 +34,9 @@
 #define wmb()          RISCV_FENCE(ow,ow)
 
 /* These barriers do not need to enforce ordering on devices, just memory. */
-#define smp_mb()       RISCV_FENCE(rw,rw)
-#define smp_rmb()      RISCV_FENCE(r,r)
-#define smp_wmb()      RISCV_FENCE(w,w)
+#define __smp_mb()     RISCV_FENCE(rw,rw)
+#define __smp_rmb()    RISCV_FENCE(r,r)
+#define __smp_wmb()    RISCV_FENCE(w,w)
 
 /*
  * This is a very specific barrier: it's currently only used in two places in
index 9c7d707..07c6e81 100644 (file)
 #include "trace.h"
 #include "trace-s390.h"
 
-
-static const intercept_handler_t instruction_handlers[256] = {
-       [0x01] = kvm_s390_handle_01,
-       [0x82] = kvm_s390_handle_lpsw,
-       [0x83] = kvm_s390_handle_diag,
-       [0xaa] = kvm_s390_handle_aa,
-       [0xae] = kvm_s390_handle_sigp,
-       [0xb2] = kvm_s390_handle_b2,
-       [0xb6] = kvm_s390_handle_stctl,
-       [0xb7] = kvm_s390_handle_lctl,
-       [0xb9] = kvm_s390_handle_b9,
-       [0xe3] = kvm_s390_handle_e3,
-       [0xe5] = kvm_s390_handle_e5,
-       [0xeb] = kvm_s390_handle_eb,
-};
-
 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
@@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
        vcpu->stat.exit_instruction++;
        trace_kvm_s390_intercept_instruction(vcpu,
                                             vcpu->arch.sie_block->ipa,
                                             vcpu->arch.sie_block->ipb);
-       handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+
+       switch (vcpu->arch.sie_block->ipa >> 8) {
+       case 0x01:
+               return kvm_s390_handle_01(vcpu);
+       case 0x82:
+               return kvm_s390_handle_lpsw(vcpu);
+       case 0x83:
+               return kvm_s390_handle_diag(vcpu);
+       case 0xaa:
+               return kvm_s390_handle_aa(vcpu);
+       case 0xae:
+               return kvm_s390_handle_sigp(vcpu);
+       case 0xb2:
+               return kvm_s390_handle_b2(vcpu);
+       case 0xb6:
+               return kvm_s390_handle_stctl(vcpu);
+       case 0xb7:
+               return kvm_s390_handle_lctl(vcpu);
+       case 0xb9:
+               return kvm_s390_handle_b9(vcpu);
+       case 0xe3:
+               return kvm_s390_handle_e3(vcpu);
+       case 0xe5:
+               return kvm_s390_handle_e5(vcpu);
+       case 0xeb:
+               return kvm_s390_handle_eb(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
index aabf46f..b04616b 100644 (file)
@@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
+       const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+       const u64 ckc = vcpu->arch.sie_block->ckc;
+
+       if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+               if ((s64)ckc >= (s64)now)
+                       return 0;
+       } else if (ckc >= now) {
                return 0;
+       }
        return ckc_interrupts_enabled(vcpu);
 }
 
@@ -187,12 +194,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
        return kvm_s390_get_cpu_timer(vcpu) >> 63;
 }
 
-static inline int is_ioirq(unsigned long irq_type)
-{
-       return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
-               (irq_type <= IRQ_PEND_IO_ISC_0));
-}
-
 static uint64_t isc_to_isc_bits(int isc)
 {
        return (0x80 >> isc) << 24;
@@ -236,10 +237,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis
        return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
 }
 
-static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
 {
        return vcpu->kvm->arch.float_int.pending_irqs |
-               vcpu->arch.local_int.pending_irqs |
+               vcpu->arch.local_int.pending_irqs;
+}
+
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+{
+       return pending_irqs_no_gisa(vcpu) |
                kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
 }
 
@@ -337,7 +343,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 {
-       if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
+       if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
                return;
        else if (psw_ioint_disabled(vcpu))
                kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
@@ -1011,24 +1017,6 @@ out:
        return rc;
 }
 
-typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
-
-static const deliver_irq_t deliver_irq_funcs[] = {
-       [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
-       [IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
-       [IRQ_PEND_PROG]           = __deliver_prog,
-       [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
-       [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
-       [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
-       [IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
-       [IRQ_PEND_RESTART]        = __deliver_restart,
-       [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
-       [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
-       [IRQ_PEND_EXT_SERVICE]    = __deliver_service,
-       [IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
-       [IRQ_PEND_VIRTIO]         = __deliver_virtio,
-};
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -1066,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
 {
-       u64 now, cputm, sltime = 0;
+       const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+       const u64 ckc = vcpu->arch.sie_block->ckc;
+       u64 cputm, sltime = 0;
 
        if (ckc_interrupts_enabled(vcpu)) {
-               now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
-               sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-               /* already expired or overflow? */
-               if (!sltime || vcpu->arch.sie_block->ckc <= now)
+               if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) {
+                       if ((s64)now < (s64)ckc)
+                               sltime = tod_to_ns((s64)ckc - (s64)now);
+               } else if (now < ckc) {
+                       sltime = tod_to_ns(ckc - now);
+               }
+               /* already expired */
+               if (!sltime)
                        return 0;
                if (cpu_timer_interrupts_enabled(vcpu)) {
                        cputm = kvm_s390_get_cpu_timer(vcpu);
@@ -1192,7 +1186,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       deliver_irq_t func;
        int rc = 0;
        unsigned long irq_type;
        unsigned long irqs;
@@ -1212,16 +1205,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
        while ((irqs = deliverable_irqs(vcpu)) && !rc) {
                /* bits are in the reverse order of interrupt priority */
                irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
-               if (is_ioirq(irq_type)) {
+               switch (irq_type) {
+               case IRQ_PEND_IO_ISC_0:
+               case IRQ_PEND_IO_ISC_1:
+               case IRQ_PEND_IO_ISC_2:
+               case IRQ_PEND_IO_ISC_3:
+               case IRQ_PEND_IO_ISC_4:
+               case IRQ_PEND_IO_ISC_5:
+               case IRQ_PEND_IO_ISC_6:
+               case IRQ_PEND_IO_ISC_7:
                        rc = __deliver_io(vcpu, irq_type);
-               } else {
-                       func = deliver_irq_funcs[irq_type];
-                       if (!func) {
-                               WARN_ON_ONCE(func == NULL);
-                               clear_bit(irq_type, &li->pending_irqs);
-                               continue;
-                       }
-                       rc = func(vcpu);
+                       break;
+               case IRQ_PEND_MCHK_EX:
+               case IRQ_PEND_MCHK_REP:
+                       rc = __deliver_machine_check(vcpu);
+                       break;
+               case IRQ_PEND_PROG:
+                       rc = __deliver_prog(vcpu);
+                       break;
+               case IRQ_PEND_EXT_EMERGENCY:
+                       rc = __deliver_emergency_signal(vcpu);
+                       break;
+               case IRQ_PEND_EXT_EXTERNAL:
+                       rc = __deliver_external_call(vcpu);
+                       break;
+               case IRQ_PEND_EXT_CLOCK_COMP:
+                       rc = __deliver_ckc(vcpu);
+                       break;
+               case IRQ_PEND_EXT_CPU_TIMER:
+                       rc = __deliver_cpu_timer(vcpu);
+                       break;
+               case IRQ_PEND_RESTART:
+                       rc = __deliver_restart(vcpu);
+                       break;
+               case IRQ_PEND_SET_PREFIX:
+                       rc = __deliver_set_prefix(vcpu);
+                       break;
+               case IRQ_PEND_PFAULT_INIT:
+                       rc = __deliver_pfault_init(vcpu);
+                       break;
+               case IRQ_PEND_EXT_SERVICE:
+                       rc = __deliver_service(vcpu);
+                       break;
+               case IRQ_PEND_PFAULT_DONE:
+                       rc = __deliver_pfault_done(vcpu);
+                       break;
+               case IRQ_PEND_VIRTIO:
+                       rc = __deliver_virtio(vcpu);
+                       break;
+               default:
+                       WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
+                       clear_bit(irq_type, &li->pending_irqs);
                }
        }
 
@@ -1701,7 +1735,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
                kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
                break;
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
+               if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
+                       kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
                break;
        default:
                kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
index ba4c709..77d7818 100644 (file)
@@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void)
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
                              unsigned long end);
 
+static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
+{
+       u8 delta_idx = 0;
+
+       /*
+        * The TOD jumps by delta, we have to compensate this by adding
+        * -delta to the epoch.
+        */
+       delta = -delta;
+
+       /* sign-extension - we're adding to signed values below */
+       if ((s64)delta < 0)
+               delta_idx = -1;
+
+       scb->epoch += delta;
+       if (scb->ecd & ECD_MEF) {
+               scb->epdx += delta_idx;
+               if (scb->epoch < delta)
+                       scb->epdx += 1;
+       }
+}
+
 /*
  * This callback is executed during stop_machine(). All CPUs are therefore
  * temporarily stopped. In order not to change guest behavior, we have to
@@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
        unsigned long long *delta = v;
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               kvm->arch.epoch -= *delta;
                kvm_for_each_vcpu(i, vcpu, kvm) {
-                       vcpu->arch.sie_block->epoch -= *delta;
+                       kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
+                       if (i == 0) {
+                               kvm->arch.epoch = vcpu->arch.sie_block->epoch;
+                               kvm->arch.epdx = vcpu->arch.sie_block->epdx;
+                       }
                        if (vcpu->arch.cputm_enabled)
                                vcpu->arch.cputm_start += *delta;
                        if (vcpu->arch.vsie_block)
-                               vcpu->arch.vsie_block->epoch -= *delta;
+                               kvm_clock_sync_scb(vcpu->arch.vsie_block,
+                                                  *delta);
                }
        }
        return NOTIFY_OK;
@@ -902,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
                return -EFAULT;
 
-       if (test_kvm_facility(kvm, 139))
-               kvm_s390_set_tod_clock_ext(kvm, &gtod);
-       else if (gtod.epoch_idx == 0)
-               kvm_s390_set_tod_clock(kvm, gtod.tod);
-       else
+       if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
                return -EINVAL;
+       kvm_s390_set_tod_clock(kvm, &gtod);
 
        VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
                gtod.epoch_idx, gtod.tod);
@@ -932,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-       u64 gtod;
+       struct kvm_s390_vm_tod_clock gtod = { 0 };
 
-       if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+       if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
+                          sizeof(gtod.tod)))
                return -EFAULT;
 
-       kvm_s390_set_tod_clock(kvm, gtod);
-       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
+       kvm_s390_set_tod_clock(kvm, &gtod);
+       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
        return 0;
 }
 
@@ -2389,6 +2413,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
        mutex_lock(&vcpu->kvm->lock);
        preempt_disable();
        vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+       vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
        preempt_enable();
        mutex_unlock(&vcpu->kvm->lock);
        if (!kvm_is_ucontrol(vcpu->kvm)) {
@@ -3021,8 +3046,8 @@ retry:
        return 0;
 }
 
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-                                const struct kvm_s390_vm_tod_clock *gtod)
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+                           const struct kvm_s390_vm_tod_clock *gtod)
 {
        struct kvm_vcpu *vcpu;
        struct kvm_s390_tod_clock_ext htod;
@@ -3034,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
        get_tod_clock_ext((char *)&htod);
 
        kvm->arch.epoch = gtod->tod - htod.tod;
-       kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
-
-       if (kvm->arch.epoch > gtod->tod)
-               kvm->arch.epdx -= 1;
+       kvm->arch.epdx = 0;
+       if (test_kvm_facility(kvm, 139)) {
+               kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+               if (kvm->arch.epoch > gtod->tod)
+                       kvm->arch.epdx -= 1;
+       }
 
        kvm_s390_vcpu_block_all(kvm);
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3050,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
        mutex_unlock(&kvm->lock);
 }
 
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
-{
-       struct kvm_vcpu *vcpu;
-       int i;
-
-       mutex_lock(&kvm->lock);
-       preempt_disable();
-       kvm->arch.epoch = tod - get_tod_clock();
-       kvm_s390_vcpu_block_all(kvm);
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-       kvm_s390_vcpu_unblock_all(kvm);
-       preempt_enable();
-       mutex_unlock(&kvm->lock);
-}
-
 /**
  * kvm_arch_fault_in_page - fault-in guest page if necessary
  * @vcpu: The corresponding virtual cpu
index bd31b37..f55ac0e 100644 (file)
@@ -19,8 +19,6 @@
 #include <asm/processor.h>
 #include <asm/sclp.h>
 
-typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)    ((vcpu->arch.sie_block->ecb & ECB_TE))
 #define TDB_FORMAT1            1
@@ -283,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
-                                const struct kvm_s390_vm_tod_clock *gtod);
-void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+                           const struct kvm_s390_vm_tod_clock *gtod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
index c4c4e15..f0b4185 100644 (file)
@@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
 /* Handle SCK (SET CLOCK) interception */
 static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_vm_tod_clock gtod = { 0 };
        int rc;
        u8 ar;
-       u64 op2, val;
+       u64 op2;
 
        vcpu->stat.instruction_sck++;
 
@@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
        op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (op2 & 7)    /* Operand must be on a doubleword boundary */
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
+       rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
-       VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
-       kvm_s390_set_tod_clock(vcpu->kvm, val);
+       VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+       kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
 
        kvm_s390_set_psw_cc(vcpu, 0);
        return 0;
@@ -795,55 +796,60 @@ out:
        return rc;
 }
 
-static const intercept_handler_t b2_handlers[256] = {
-       [0x02] = handle_stidp,
-       [0x04] = handle_set_clock,
-       [0x10] = handle_set_prefix,
-       [0x11] = handle_store_prefix,
-       [0x12] = handle_store_cpu_address,
-       [0x14] = kvm_s390_handle_vsie,
-       [0x21] = handle_ipte_interlock,
-       [0x29] = handle_iske,
-       [0x2a] = handle_rrbe,
-       [0x2b] = handle_sske,
-       [0x2c] = handle_test_block,
-       [0x30] = handle_io_inst,
-       [0x31] = handle_io_inst,
-       [0x32] = handle_io_inst,
-       [0x33] = handle_io_inst,
-       [0x34] = handle_io_inst,
-       [0x35] = handle_io_inst,
-       [0x36] = handle_io_inst,
-       [0x37] = handle_io_inst,
-       [0x38] = handle_io_inst,
-       [0x39] = handle_io_inst,
-       [0x3a] = handle_io_inst,
-       [0x3b] = handle_io_inst,
-       [0x3c] = handle_io_inst,
-       [0x50] = handle_ipte_interlock,
-       [0x56] = handle_sthyi,
-       [0x5f] = handle_io_inst,
-       [0x74] = handle_io_inst,
-       [0x76] = handle_io_inst,
-       [0x7d] = handle_stsi,
-       [0xb1] = handle_stfl,
-       [0xb2] = handle_lpswe,
-};
-
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       /*
-        * A lot of B2 instructions are priviledged. Here we check for
-        * the privileged ones, that we can handle in the kernel.
-        * Anything else goes to userspace.
-        */
-       handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x02:
+               return handle_stidp(vcpu);
+       case 0x04:
+               return handle_set_clock(vcpu);
+       case 0x10:
+               return handle_set_prefix(vcpu);
+       case 0x11:
+               return handle_store_prefix(vcpu);
+       case 0x12:
+               return handle_store_cpu_address(vcpu);
+       case 0x14:
+               return kvm_s390_handle_vsie(vcpu);
+       case 0x21:
+       case 0x50:
+               return handle_ipte_interlock(vcpu);
+       case 0x29:
+               return handle_iske(vcpu);
+       case 0x2a:
+               return handle_rrbe(vcpu);
+       case 0x2b:
+               return handle_sske(vcpu);
+       case 0x2c:
+               return handle_test_block(vcpu);
+       case 0x30:
+       case 0x31:
+       case 0x32:
+       case 0x33:
+       case 0x34:
+       case 0x35:
+       case 0x36:
+       case 0x37:
+       case 0x38:
+       case 0x39:
+       case 0x3a:
+       case 0x3b:
+       case 0x3c:
+       case 0x5f:
+       case 0x74:
+       case 0x76:
+               return handle_io_inst(vcpu);
+       case 0x56:
+               return handle_sthyi(vcpu);
+       case 0x7d:
+               return handle_stsi(vcpu);
+       case 0xb1:
+               return handle_stfl(vcpu);
+       case 0xb2:
+               return handle_lpswe(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_epsw(struct kvm_vcpu *vcpu)
@@ -1105,25 +1111,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static const intercept_handler_t b9_handlers[256] = {
-       [0x8a] = handle_ipte_interlock,
-       [0x8d] = handle_epsw,
-       [0x8e] = handle_ipte_interlock,
-       [0x8f] = handle_ipte_interlock,
-       [0xab] = handle_essa,
-       [0xaf] = handle_pfmf,
-};
-
 int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       /* This is handled just as for the B2 instructions. */
-       handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x8a:
+       case 0x8e:
+       case 0x8f:
+               return handle_ipte_interlock(vcpu);
+       case 0x8d:
+               return handle_epsw(vcpu);
+       case 0xab:
+               return handle_essa(vcpu);
+       case 0xaf:
+               return handle_pfmf(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
@@ -1271,22 +1274,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
-static const intercept_handler_t eb_handlers[256] = {
-       [0x2f] = handle_lctlg,
-       [0x25] = handle_stctg,
-       [0x60] = handle_ri,
-       [0x61] = handle_ri,
-       [0x62] = handle_ri,
-};
-
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
+       case 0x25:
+               return handle_stctg(vcpu);
+       case 0x2f:
+               return handle_lctlg(vcpu);
+       case 0x60:
+       case 0x61:
+       case 0x62:
+               return handle_ri(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_tprot(struct kvm_vcpu *vcpu)
@@ -1346,10 +1347,12 @@ out_unlock:
 
 int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
 {
-       /* For e5xx... instructions we only handle TPROT */
-       if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x01:
                return handle_tprot(vcpu);
-       return -EOPNOTSUPP;
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int handle_sckpf(struct kvm_vcpu *vcpu)
@@ -1380,17 +1383,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static const intercept_handler_t x01_handlers[256] = {
-       [0x04] = handle_ptff,
-       [0x07] = handle_sckpf,
-};
-
 int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
 {
-       intercept_handler_t handler;
-
-       handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
-       if (handler)
-               return handler(vcpu);
-       return -EOPNOTSUPP;
+       switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+       case 0x04:
+               return handle_ptff(vcpu);
+       case 0x07:
+               return handle_sckpf(vcpu);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
index ec77270..8961e39 100644 (file)
@@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
        struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
        struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+       int guest_bp_isolation;
        int rc;
 
        handle_last_fault(vcpu, vsie_page);
@@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
                s390_handle_mcck();
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
+       /* save current guest state of bp isolation override */
+       guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+       /*
+        * The guest is running with BPBC, so we have to force it on for our
+        * nested guest. This is done by enabling BPBC globally, so the BPBC
+        * control in the SCB (which the nested guest can modify) is simply
+        * ignored.
+        */
+       if (test_kvm_facility(vcpu->kvm, 82) &&
+           vcpu->arch.sie_block->fpf & FPF_BPBC)
+               set_thread_flag(TIF_ISOLATE_BP_GUEST);
+
        local_irq_disable();
        guest_enter_irqoff();
        local_irq_enable();
@@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
        local_irq_disable();
        guest_exit_irqoff();
        local_irq_enable();
+
+       /* restore guest state for bp isolation override */
+       if (!guest_bp_isolation)
+               clear_thread_flag(TIF_ISOLATE_BP_GUEST);
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        if (rc == -EINTR) {
index c1236b1..eb7f43f 100644 (file)
@@ -430,6 +430,7 @@ config GOLDFISH
 config RETPOLINE
        bool "Avoid speculative indirect branches in kernel"
        default y
+       select STACK_VALIDATION if HAVE_STACK_VALIDATION
        help
          Compile kernel with the retpoline compiler options to guard against
          kernel-to-user data leaks by avoiding speculative indirect
index fad5516..498c1b8 100644 (file)
@@ -232,10 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
-    ifneq ($(RETPOLINE_CFLAGS),)
-        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
-    endif
+ifneq ($(RETPOLINE_CFLAGS),)
+  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+endif
 endif
 
 archscripts: scripts_basic
index dce7092..be63330 100644 (file)
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
        /*
         * Push registers and sanitize registers of values that a
         * speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
         * could be put to use in a speculative execution gadget.
         * Interleave XOR with PUSH for better uop scheduling:
         */
+       .if \save_ret
+       pushq   %rsi            /* pt_regs->si */
+       movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
+       movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
+       .else
        pushq   %rdi            /* pt_regs->di */
        pushq   %rsi            /* pt_regs->si */
+       .endif
        pushq   \rdx            /* pt_regs->dx */
        pushq   %rcx            /* pt_regs->cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
-       xorq    %r8, %r8        /* nospec   r8 */
+       xorl    %r8d, %r8d      /* nospec   r8 */
        pushq   %r9             /* pt_regs->r9 */
-       xorq    %r9, %r9        /* nospec   r9 */
+       xorl    %r9d, %r9d      /* nospec   r9 */
        pushq   %r10            /* pt_regs->r10 */
-       xorq    %r10, %r10      /* nospec   r10 */
+       xorl    %r10d, %r10d    /* nospec   r10 */
        pushq   %r11            /* pt_regs->r11 */
-       xorq    %r11, %r11      /* nospec   r11*/
+       xorl    %r11d, %r11d    /* nospec   r11*/
        pushq   %rbx            /* pt_regs->rbx */
        xorl    %ebx, %ebx      /* nospec   rbx*/
        pushq   %rbp            /* pt_regs->rbp */
        xorl    %ebp, %ebp      /* nospec   rbp*/
        pushq   %r12            /* pt_regs->r12 */
-       xorq    %r12, %r12      /* nospec   r12*/
+       xorl    %r12d, %r12d    /* nospec   r12*/
        pushq   %r13            /* pt_regs->r13 */
-       xorq    %r13, %r13      /* nospec   r13*/
+       xorl    %r13d, %r13d    /* nospec   r13*/
        pushq   %r14            /* pt_regs->r14 */
-       xorq    %r14, %r14      /* nospec   r14*/
+       xorl    %r14d, %r14d    /* nospec   r14*/
        pushq   %r15            /* pt_regs->r15 */
-       xorq    %r15, %r15      /* nospec   r15*/
+       xorl    %r15d, %r15d    /* nospec   r15*/
        UNWIND_HINT_REGS
+       .if \save_ret
+       pushq   %rsi            /* return address on top of stack */
+       .endif
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
 #ifdef CONFIG_FRAME_POINTER
-       .if \ptregs_offset
-               leaq \ptregs_offset(%rsp), %rbp
-       .else
-               mov %rsp, %rbp
-       .endif
-       orq     $0x1, %rbp
+       leaq 1+\ptregs_offset(%rsp), %rbp
 #endif
 .endm
 
index 16c2c02..6ad064c 100644 (file)
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       /* Clobbers %ebx */
-       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
index 8971bd6..d5c7f18 100644 (file)
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       /* Clobbers %rbx */
-       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
@@ -449,9 +448,19 @@ END(irq_entries_start)
  *
  * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
  */
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
        DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+       .if \save_ret
+       /*
+        * If save_ret is set, the original stack contains one additional
+        * entry -- the return address. Therefore, move the address one
+        * entry below %rsp to \old_rsp.
+        */
+       leaq    8(%rsp), \old_rsp
+       .else
        movq    %rsp, \old_rsp
+       .endif
 
        .if \regs
        UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +506,15 @@ END(irq_entries_start)
        .if \regs
        UNWIND_HINT_REGS indirect=1
        .endif
+
+       .if \save_ret
+       /*
+        * Push the return address to the stack. This return address can
+        * be found at the "real" original RSP, which was offset by 8 at
+        * the beginning of this macro.
+        */
+       pushq   -8(\old_rsp)
+       .endif
 .endm
 
 /*
@@ -520,27 +538,65 @@ END(irq_entries_start)
 .endm
 
 /*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
  *
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss                                          |
+ * | regs->rsp                                         |
+ * | regs->eflags                                      |
+ * | regs->cs                                          |
+ * | regs->ip                                          |
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number)               |
+ * +----------------------------------------------------+
+ * | return address                                    |
+ * +----------------------------------------------------+
  */
-
-/* 0(%rsp): ~(interrupt number) */
-       .macro interrupt func
+ENTRY(interrupt_entry)
+       UNWIND_HINT_FUNC
+       ASM_CLAC
        cld
 
-       testb   $3, CS-ORIG_RAX(%rsp)
+       testb   $3, CS-ORIG_RAX+8(%rsp)
        jz      1f
        SWAPGS
-       call    switch_to_thread_stack
+
+       /*
+        * Switch to the thread stack. The IRET frame and orig_ax are
+        * on the stack, as well as the return address. RDI..R12 are
+        * not (yet) on the stack and space has not (yet) been
+        * allocated for them.
+        */
+       pushq   %rdi
+
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+        /*
+         * We have RDI, return address, and orig_ax on the stack on
+         * top of the IRET frame. That means offset=24
+         */
+       UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
 1:
 
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
 
-       testb   $3, CS(%rsp)
+       testb   $3, CS+8(%rsp)
        jz      1f
 
        /*
@@ -548,7 +604,7 @@ END(irq_entries_start)
         *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
-        * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+        * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,
         * the simplest way to handle it is to just call it twice if
         * we enter from user mode.  There's no reason to optimize this since
         * TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +614,15 @@ END(irq_entries_start)
        CALL_enter_from_user_mode
 
 1:
-       ENTER_IRQ_STACK old_rsp=%rdi
+       ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
        /* We entered an interrupt context - irqs are off: */
        TRACE_IRQS_OFF
 
-       call    \func   /* rdi points to pt_regs */
-       .endm
+       ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
 
        /*
         * The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +630,10 @@ END(irq_entries_start)
         */
        .p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
-       ASM_CLAC
        addq    $-0x80, (%rsp)                  /* Adjust vector to [-256, -1] range */
-       interrupt do_IRQ
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    do_IRQ  /* rdi points to pt_regs */
        /* 0(%rsp): old RSP */
 ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +826,11 @@ END(common_interrupt)
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS
-       ASM_CLAC
        pushq   $~(\num)
 .Lcommon_\sym:
-       interrupt \do_sym
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    \do_sym /* rdi points to pt_regs */
        jmp     ret_from_intr
 END(\sym)
 .endm
@@ -832,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
 
-/*
- * Switch to the thread stack.  This is called with the IRET frame and
- * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
-       UNWIND_HINT_FUNC
-
-       pushq   %rdi
-       /* Need to switch before accessing the thread stack. */
-       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-       movq    %rsp, %rdi
-       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
-       pushq   7*8(%rdi)               /* regs->ss */
-       pushq   6*8(%rdi)               /* regs->rsp */
-       pushq   5*8(%rdi)               /* regs->eflags */
-       pushq   4*8(%rdi)               /* regs->cs */
-       pushq   3*8(%rdi)               /* regs->ip */
-       pushq   2*8(%rdi)               /* regs->orig_ax */
-       pushq   8(%rdi)                 /* return address */
-       UNWIND_HINT_FUNC
-
-       movq    (%rdi), %rdi
-       ret
-END(switch_to_thread_stack)
-
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +908,8 @@ ENTRY(\sym)
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
        .endif
 
-       /* Save all registers in pt_regs */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
-
        .if \paranoid < 2
-       testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
+       testb   $3, CS-ORIG_RAX(%rsp)           /* If coming from userspace, switch stacks */
        jnz     .Lfrom_usermode_switch_stack_\@
        .endif
 
@@ -1130,13 +1159,15 @@ idtentry machine_check          do_mce                  has_error_code=0        paranoid=1
 #endif
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
        UNWIND_HINT_FUNC
        cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1181,12 +1212,14 @@ ENTRY(paranoid_exit)
 END(paranoid_exit)
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-       UNWIND_HINT_REGS offset=8
+       UNWIND_HINT_FUNC
        cld
+       PUSH_AND_CLEAR_REGS save_ret=1
+       ENCODE_FRAME_POINTER 8
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -1577,8 +1610,6 @@ end_repeat_nmi:
         * frame to point back to repeat_nmi.
         */
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       PUSH_AND_CLEAR_REGS
-       ENCODE_FRAME_POINTER
 
        /*
         * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
index fd65e01..e811dd9 100644 (file)
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
        cld
 
        /*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
         */
        SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
 
-       xorq    %r8, %r8
-       xorq    %r9, %r9
-       xorq    %r10, %r10
+       xorl    %r8d, %r8d
+       xorl    %r9d, %r9d
+       xorl    %r10d, %r10d
        swapgs
        sysretl
 END(entry_SYSCALL_compat)
@@ -347,10 +347,23 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
+       /* switch to thread stack expects orig_ax and rdi to be pushed */
        pushq   %rax                    /* pt_regs->orig_ax */
+       pushq   %rdi                    /* pt_regs->di */
+
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+       pushq   6*8(%rdi)               /* regs->ss */
+       pushq   5*8(%rdi)               /* regs->rsp */
+       pushq   4*8(%rdi)               /* regs->eflags */
+       pushq   3*8(%rdi)               /* regs->cs */
+       pushq   2*8(%rdi)               /* regs->ip */
+       pushq   1*8(%rdi)               /* regs->orig_ax */
 
-       /* switch to thread stack expects orig_ax to be pushed */
-       call    switch_to_thread_stack
+       movq    (%rdi), %rdi            /* restore %rdi */
 
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
@@ -358,25 +371,25 @@ ENTRY(entry_INT80_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
-       xorq    %r8, %r8                /* nospec   r8 */
+       xorl    %r8d, %r8d              /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
-       xorq    %r9, %r9                /* nospec   r9 */
+       xorl    %r9d, %r9d              /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
-       xorq    %r10, %r10              /* nospec   r10 */
+       xorl    %r10d, %r10d            /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
-       xorq    %r11, %r11              /* nospec   r11 */
+       xorl    %r11d, %r11d            /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
        xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp */
        xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   %r12                    /* pt_regs->r12 */
-       xorq    %r12, %r12              /* nospec   r12 */
+       xorl    %r12d, %r12d            /* nospec   r12 */
        pushq   %r13                    /* pt_regs->r13 */
-       xorq    %r13, %r13              /* nospec   r13 */
+       xorl    %r13d, %r13d            /* nospec   r13 */
        pushq   %r14                    /* pt_regs->r14 */
-       xorq    %r14, %r14              /* nospec   r14 */
+       xorl    %r14d, %r14d            /* nospec   r14 */
        pushq   %r15                    /* pt_regs->r15 */
-       xorq    %r15, %r15              /* nospec   r15 */
+       xorl    %r15d, %r15d            /* nospec   r15 */
        cld
 
        /*
index 4d4015d..c356098 100644 (file)
@@ -7,6 +7,8 @@
 #ifndef _ASM_X86_MACH_DEFAULT_APM_H
 #define _ASM_X86_MACH_DEFAULT_APM_H
 
+#include <asm/nospec-branch.h>
+
 #ifdef APM_ZERO_SEGS
 #      define APM_DO_ZERO_SEGS \
                "pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
         * N.B. We do NOT need a cld after the BIOS call
         * because we always save and restore the flags.
         */
+       firmware_restrict_branch_speculation_start();
        __asm__ __volatile__(APM_DO_ZERO_SEGS
                "pushl %%edi\n\t"
                "pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
                  "=S" (*esi)
                : "a" (func), "b" (ebx_in), "c" (ecx_in)
                : "memory", "cc");
+       firmware_restrict_branch_speculation_end();
 }
 
 static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
         * N.B. We do NOT need a cld after the BIOS call
         * because we always save and restore the flags.
         */
+       firmware_restrict_branch_speculation_start();
        __asm__ __volatile__(APM_DO_ZERO_SEGS
                "pushl %%edi\n\t"
                "pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
                  "=S" (si)
                : "a" (func), "b" (ebx_in), "c" (ecx_in)
                : "memory", "cc");
+       firmware_restrict_branch_speculation_end();
        return error;
 }
 
index 4d11161..1908214 100644 (file)
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
 #endif /* CONFIG_RETPOLINE */
index 0dfe4d3..f41079d 100644 (file)
 #define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
 
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW         ( 8*32+ 0) /* Intel TPR Shadow */
index 85f6ccb..a399c1e 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor-flags.h>
 #include <asm/tlb.h>
+#include <asm/nospec-branch.h>
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
 
 extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
-#define arch_efi_call_virt_setup()     kernel_fpu_begin()
-#define arch_efi_call_virt_teardown()  kernel_fpu_end()
+#define arch_efi_call_virt_setup()                                     \
+({                                                                     \
+       kernel_fpu_begin();                                             \
+       firmware_restrict_branch_speculation_start();                   \
+})
+
+#define arch_efi_call_virt_teardown()                                  \
+({                                                                     \
+       firmware_restrict_branch_speculation_end();                     \
+       kernel_fpu_end();                                               \
+})
+
 
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
        efi_sync_low_kernel_mappings();                                 \
        preempt_disable();                                              \
        __kernel_fpu_begin();                                           \
+       firmware_restrict_branch_speculation_start();                   \
                                                                        \
        if (efi_scratch.use_pgd) {                                      \
                efi_scratch.prev_cr3 = __read_cr3();                    \
@@ -91,6 +103,7 @@ struct efi_scratch {
                __flush_tlb_all();                                      \
        }                                                               \
                                                                        \
+       firmware_restrict_branch_speculation_end();                     \
        __kernel_fpu_end();                                             \
        preempt_enable();                                               \
 })
index dd6f57a..0a9e330 100644 (file)
@@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define put_smstate(type, buf, offset, val)                      \
        *(type *)((buf) + (offset) - 0x7e00) = val
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end);
-
 #endif /* _ASM_X86_KVM_HOST_H */
index 55520ce..7fb1047 100644 (file)
@@ -37,7 +37,12 @@ struct cpu_signature {
 
 struct device;
 
-enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+enum ucode_state {
+       UCODE_OK        = 0,
+       UCODE_UPDATED,
+       UCODE_NFOUND,
+       UCODE_ERROR,
+};
 
 struct microcode_ops {
        enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +59,7 @@ struct microcode_ops {
         * are being called.
         * See also the "Synchronization" section in microcode_core.c.
         */
-       int (*apply_microcode) (int cpu);
+       enum ucode_state (*apply_microcode) (int cpu);
        int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
index c931b88..1de72ce 100644 (file)
@@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot)
        return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
 #else
        BUG();
+       return (void *)fix_to_virt(FIX_HOLE);
 #endif
 }
 
index 81a1be3..d0dabea 100644 (file)
@@ -8,6 +8,50 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version â€” two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
+       mov     $(nr/2), reg;                   \
+771:                                           \
+       call    772f;                           \
+773:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     773b;                           \
+772:                                           \
+       call    774f;                           \
+775:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     775b;                           \
+774:                                           \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
 #ifdef __ASSEMBLY__
 
 /*
 .endm
 
 /*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+.macro ANNOTATE_RETPOLINE_SAFE
+       .Lannotate_\@:
+       .pushsection .discard.retpoline_safe
+       _ASM_PTR .Lannotate_\@
+       .popsection
+.endm
+
+/*
  * These are the bare retpoline primitives for indirect jmp and call.
  * Do not use these directly; they only exist to make the ALTERNATIVE
  * invocation below less ugly.
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
        ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),  \
                __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
-               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
        jmp     *\reg
 #endif
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
        ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE_2 __stringify(call *\reg),                          \
+       ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
                __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+               __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
 #else
        call    *\reg
 #endif
 .endm
 
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-       ALTERNATIVE "", "call __clear_rsb", \ftr
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+.Lskip_rsb_\@:
 #endif
 .endm
 
        ".long 999b - .\n\t"                                    \
        ".popsection\n\t"
 
+#define ANNOTATE_RETPOLINE_SAFE                                        \
+       "999:\n\t"                                              \
+       ".pushsection .discard.retpoline_safe\n\t"              \
+       _ASM_PTR " 999b\n\t"                                    \
+       ".popsection\n\t"
+
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
 
 /*
 # define CALL_NOSPEC                                           \
        ANNOTATE_NOSPEC_ALTERNATIVE                             \
        ALTERNATIVE(                                            \
+       ANNOTATE_RETPOLINE_SAFE                                 \
        "call *%[thunk_target]\n",                              \
        "call __x86_indirect_thunk_%V[thunk_target]\n",         \
        X86_FEATURE_RETPOLINE)
@@ -156,26 +226,54 @@ extern char __indirect_thunk_end[];
 static inline void vmexit_fill_RSB(void)
 {
 #ifdef CONFIG_RETPOLINE
-       alternative_input("",
-                         "call __fill_rsb",
-                         X86_FEATURE_RETPOLINE,
-                         ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+       unsigned long loops;
+
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=r" (loops), ASM_CALL_CONSTRAINT
+                     : : "memory" );
 #endif
 }
 
+#define alternative_msr_write(_msr, _val, _feature)            \
+       asm volatile(ALTERNATIVE("",                            \
+                                "movl %[msr], %%ecx\n\t"       \
+                                "movl %[val], %%eax\n\t"       \
+                                "movl $0, %%edx\n\t"           \
+                                "wrmsr",                       \
+                                _feature)                      \
+                    : : [msr] "i" (_msr), [val] "i" (_val)     \
+                    : "eax", "ecx", "edx", "memory")
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-       asm volatile(ALTERNATIVE("",
-                                "movl %[msr], %%ecx\n\t"
-                                "movl %[val], %%eax\n\t"
-                                "movl $0, %%edx\n\t"
-                                "wrmsr",
-                                X86_FEATURE_USE_IBPB)
-                    : : [msr] "i" (MSR_IA32_PRED_CMD),
-                        [val] "i" (PRED_CMD_IBPB)
-                    : "eax", "ecx", "edx", "memory");
+       alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+                             X86_FEATURE_USE_IBPB);
 }
 
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
+ */
+#define firmware_restrict_branch_speculation_start()                   \
+do {                                                                   \
+       preempt_disable();                                              \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,       \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+} while (0)
+
+#define firmware_restrict_branch_speculation_end()                     \
+do {                                                                   \
+       alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,                    \
+                             X86_FEATURE_USE_IBRS_FW);                 \
+       preempt_enable();                                               \
+} while (0)
+
 #endif /* __ASSEMBLY__ */
 
 /*
index 554841f..c83a2f4 100644 (file)
@@ -7,6 +7,7 @@
 #ifdef CONFIG_PARAVIRT
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/paravirt_types.h>
 
@@ -879,23 +880,27 @@ extern void default_banner(void);
 
 #define INTERRUPT_RETURN                                               \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
 
 #define DISABLE_INTERRUPTS(clobbers)                                   \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define ENABLE_INTERRUPTS(clobbers)                                    \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);            \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #ifdef CONFIG_X86_32
 #define GET_CR0_INTO_EAX                               \
        push %ecx; push %edx;                           \
+       ANNOTATE_RETPOLINE_SAFE;                                \
        call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
        pop %edx; pop %ecx
 #else  /* !CONFIG_X86_32 */
@@ -917,21 +922,25 @@ extern void default_banner(void);
  */
 #define SWAPGS                                                         \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
-                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)          \
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
                 )
 
 #define GET_CR2_INTO_RAX                               \
-       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+       ANNOTATE_RETPOLINE_SAFE;                                \
+       call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
 
 #define USERGS_SYSRET64                                                        \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+                 ANNOTATE_RETPOLINE_SAFE;                                      \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
 
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)                                        \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
                  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 ANNOTATE_RETPOLINE_SAFE;                                  \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
                  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
index f624f1f..180bc0b 100644 (file)
@@ -43,6 +43,7 @@
 #include <asm/desc_defs.h>
 #include <asm/kmap_types.h>
 #include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
 
 struct page;
 struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
  * offset into the paravirt_patch_template structure, and can therefore be
  * freely converted back into a structure offset.
  */
-#define PARAVIRT_CALL  "call *%c[paravirt_opptr];"
+#define PARAVIRT_CALL                                  \
+       ANNOTATE_RETPOLINE_SAFE                         \
+       "call *%c[paravirt_opptr];"
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
index 63c2552..b444d83 100644 (file)
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
 {
        pmdval_t v = native_pmd_val(pmd);
 
-       return __pmd(v | set);
+       return native_make_pmd(v | set);
 }
 
 static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 {
        pmdval_t v = native_pmd_val(pmd);
 
-       return __pmd(v & ~clear);
+       return native_make_pmd(v & ~clear);
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
 {
        pudval_t v = native_pud_val(pud);
 
-       return __pud(v | set);
+       return native_make_pud(v | set);
 }
 
 static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 {
        pudval_t v = native_pud_val(pud);
 
-       return __pud(v & ~clear);
+       return native_make_pud(v & ~clear);
 }
 
 static inline pud_t pud_mkold(pud_t pud)
index 3696398..246f15b 100644 (file)
@@ -323,6 +323,11 @@ static inline pudval_t native_pud_val(pud_t pud)
 #else
 #include <asm-generic/pgtable-nopud.h>
 
+static inline pud_t native_make_pud(pudval_t val)
+{
+       return (pud_t) { .p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pudval_t native_pud_val(pud_t pud)
 {
        return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +349,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 #else
 #include <asm-generic/pgtable-nopmd.h>
 
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+       return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
+}
+
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
        return native_pgd_val(pmd.pud.p4d.pgd);
index 1bd9ed8..b0ccd48 100644 (file)
@@ -977,4 +977,5 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
+void microcode_check(void);
 #endif /* _ASM_X86_PROCESSOR_H */
index 4e44250..d651711 100644 (file)
@@ -67,13 +67,13 @@ static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
        GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
-                                 r->refs.counter, "er", i, "%0", e);
+                                 r->refs.counter, "er", i, "%0", e, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
        GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
-                                r->refs.counter, "%0", e);
+                                r->refs.counter, "%0", e, "cx");
 }
 
 static __always_inline __must_check
index f91c365..4914a3e 100644 (file)
@@ -2,8 +2,7 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#define __CLOBBERS_MEM         "memory"
-#define __CLOBBERS_MEM_CC_CX   "memory", "cc", "cx"
+#define __CLOBBERS_MEM(clb...) "memory", ## clb
 
 #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
 
@@ -40,18 +39,19 @@ do {                                                                        \
 #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
-       __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+       __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
 
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc)            \
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
        __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,                 \
-                   __CLOBBERS_MEM_CC_CX)
+                   __CLOBBERS_MEM(clobbers))
 
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
        __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,                \
-                   __CLOBBERS_MEM, vcon (val))
+                   __CLOBBERS_MEM(), vcon (val))
 
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc)        \
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc,        \
+                                 clobbers...)                          \
        __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,  \
-                   __CLOBBERS_MEM_CC_CX, vcon (val))
+                   __CLOBBERS_MEM(clobbers), vcon (val))
 
 #endif /* _ASM_X86_RMWcc */
index 7a2ade4..6cfa9c8 100644 (file)
@@ -26,6 +26,7 @@
 #define KVM_FEATURE_PV_EOI             6
 #define KVM_FEATURE_PV_UNHALT          7
 #define KVM_FEATURE_PV_TLB_FLUSH       9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT    10
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
index 8ad2e41..7c55387 100644 (file)
@@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void)
        do {
                rep_nop();
                now = rdtsc();
-       } while ((now - start) < 40000000000UL / HZ &&
+       } while ((now - start) < 40000000000ULL / HZ &&
                time_before_eq(jiffies, end));
 }
 
index d71c8b5..bfca937 100644 (file)
@@ -300,6 +300,15 @@ retpoline_auto:
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
                pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
        }
+
+       /*
+        * Retpoline means the kernel is safe because it has no indirect
+        * branches. But firmware isn't, so use IBRS to protect that.
+        */
+       if (boot_cpu_has(X86_FEATURE_IBRS)) {
+               setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+               pr_info("Enabling Restricted Speculation for firmware calls\n");
+       }
 }
 
 #undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return sprintf(buf, "Not affected\n");
 
-       return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+       return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
                       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+                      boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
                       spectre_v2_module_string());
 }
 #endif
index 824aee0..348cf48 100644 (file)
@@ -1749,3 +1749,33 @@ static int __init init_cpu_syscore(void)
        return 0;
 }
 core_initcall(init_cpu_syscore);
+
+/*
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
+ */
+void microcode_check(void)
+{
+       struct cpuinfo_x86 info;
+
+       perf_check_microcode();
+
+       /* Reload CPUID max function as it might've changed. */
+       info.cpuid_level = cpuid_eax(0);
+
+       /*
+        * Copy all capability leafs to pick up the synthetic ones so that
+        * memcmp() below doesn't fail on that. The ones coming from CPUID will
+        * get overwritten in get_cpu_cap().
+        */
+       memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
+
+       get_cpu_cap(&info);
+
+       if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
+               return;
+
+       pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+       pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+}
index 330b846..a998e1a 100644 (file)
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
        return patch_size;
 }
 
-static int apply_microcode_amd(int cpu)
+static enum ucode_state apply_microcode_amd(int cpu)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
        struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
 
        p = find_patch(cpu);
        if (!p)
-               return 0;
+               return UCODE_NFOUND;
 
        mc_amd  = p->data;
        uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
        if (rev >= mc_amd->hdr.patch_id) {
                c->microcode = rev;
                uci->cpu_sig.rev = rev;
-               return 0;
+               return UCODE_OK;
        }
 
        if (__apply_microcode_amd(mc_amd)) {
                pr_err("CPU%d: update failed for patch_level=0x%08x\n",
                        cpu, mc_amd->hdr.patch_id);
-               return -1;
+               return UCODE_ERROR;
        }
        pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
                mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
        uci->cpu_sig.rev = mc_amd->hdr.patch_id;
        c->microcode = mc_amd->hdr.patch_id;
 
-       return 0;
+       return UCODE_UPDATED;
 }
 
 static int install_equiv_cpu_table(const u8 *buf)
index 319dd65..aa1b9a4 100644 (file)
@@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu)
 }
 
 struct apply_microcode_ctx {
-       int err;
+       enum ucode_state err;
 };
 
 static void apply_microcode_local(void *arg)
@@ -489,31 +489,30 @@ static void __exit microcode_dev_exit(void)
 /* fake device for request_firmware */
 static struct platform_device  *microcode_pdev;
 
-static int reload_for_cpu(int cpu)
+static enum ucode_state reload_for_cpu(int cpu)
 {
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
        enum ucode_state ustate;
-       int err = 0;
 
        if (!uci->valid)
-               return err;
+               return UCODE_OK;
 
        ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
-       if (ustate == UCODE_OK)
-               apply_microcode_on_target(cpu);
-       else
-               if (ustate == UCODE_ERROR)
-                       err = -EINVAL;
-       return err;
+       if (ustate != UCODE_OK)
+               return ustate;
+
+       return apply_microcode_on_target(cpu);
 }
 
 static ssize_t reload_store(struct device *dev,
                            struct device_attribute *attr,
                            const char *buf, size_t size)
 {
+       enum ucode_state tmp_ret = UCODE_OK;
+       bool do_callback = false;
        unsigned long val;
+       ssize_t ret = 0;
        int cpu;
-       ssize_t ret = 0, tmp_ret;
 
        ret = kstrtoul(buf, 0, &val);
        if (ret)
@@ -526,15 +525,21 @@ static ssize_t reload_store(struct device *dev,
        mutex_lock(&microcode_mutex);
        for_each_online_cpu(cpu) {
                tmp_ret = reload_for_cpu(cpu);
-               if (tmp_ret != 0)
+               if (tmp_ret > UCODE_NFOUND) {
                        pr_warn("Error reloading microcode on CPU %d\n", cpu);
 
-               /* save retval of the first encountered reload error */
-               if (!ret)
-                       ret = tmp_ret;
+                       /* set retval for the first encountered reload error */
+                       if (!ret)
+                               ret = -EINVAL;
+               }
+
+               if (tmp_ret == UCODE_UPDATED)
+                       do_callback = true;
        }
-       if (!ret)
-               perf_check_microcode();
+
+       if (!ret && do_callback)
+               microcode_check();
+
        mutex_unlock(&microcode_mutex);
        put_online_cpus();
 
index a15db2b..923054a 100644 (file)
@@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
        return 0;
 }
 
-static int apply_microcode_intel(int cpu)
+static enum ucode_state apply_microcode_intel(int cpu)
 {
        struct microcode_intel *mc;
        struct ucode_cpu_info *uci;
@@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu)
 
        /* We should bind the task to the CPU */
        if (WARN_ON(raw_smp_processor_id() != cpu))
-               return -1;
+               return UCODE_ERROR;
 
        uci = ucode_cpu_info + cpu;
        mc = uci->mc;
@@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu)
                /* Look for a newer patch in our cache: */
                mc = find_patch(uci);
                if (!mc)
-                       return 0;
+                       return UCODE_NFOUND;
        }
 
        /* write microcode via MSR 0x79 */
@@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu)
        if (rev != mc->hdr.rev) {
                pr_err("CPU%d update to revision 0x%x failed\n",
                       cpu, mc->hdr.rev);
-               return -1;
+               return UCODE_ERROR;
        }
 
        if (rev != prev_rev) {
@@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu)
        uci->cpu_sig.rev = rev;
        c->microcode = rev;
 
-       return 0;
+       return UCODE_UPDATED;
 }
 
 static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
index 04a625f..0f545b3 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/nops.h>
 #include "../entry/calling.h"
 #include <asm/export.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -134,6 +135,7 @@ ENTRY(secondary_startup_64)
 
        /* Ensure I am executing from virtual addresses */
        movq    $1f, %rax
+       ANNOTATE_RETPOLINE_SAFE
        jmp     *%rax
 1:
        UNWIND_HINT_EMPTY
index 4e37d1a..bc1a272 100644 (file)
@@ -49,7 +49,7 @@
 
 static int kvmapf = 1;
 
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
 {
         kvmapf = 0;
         return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
 early_param("no-kvmapf", parse_no_kvmapf);
 
 static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
 {
         steal_acc = 0;
         return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
 early_param("no-steal-acc", parse_no_stealacc);
 
 static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
 {
         kvmclock_vsyscall = 0;
         return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
 #endif
                pa |= KVM_ASYNC_PF_ENABLED;
 
-               /* Async page fault support for L1 hypervisor is optional */
-               if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
-                       (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
-                       wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+               if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+                       pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+               wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                __this_cpu_write(apf_reason.enabled, 1);
                printk(KERN_INFO"KVM setup async PF for cpu %d\n",
                       smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
                pv_time_ops.steal_clock = kvm_steal_clock;
        }
 
-       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+           !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
                pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
 {
        int cpu;
 
-       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+       if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+           !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
                for_each_possible_cpu(cpu) {
                        zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
                                GFP_KERNEL, cpu_to_node(cpu));
index 1f9188f..feb28fe 100644 (file)
@@ -5,7 +5,6 @@
 #include <asm/unwind.h>
 #include <asm/orc_types.h>
 #include <asm/orc_lookup.h>
-#include <asm/sections.h>
 
 #define orc_warn(fmt, ...) \
        printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
        }
 
        /* vmlinux .init slow lookup: */
-       if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+       if (init_kernel_text(ip))
                return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
                                  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
 
index a0c5a69..b671fc2 100644 (file)
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                             (1 << KVM_FEATURE_PV_EOI) |
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
                             (1 << KVM_FEATURE_PV_UNHALT) |
-                            (1 << KVM_FEATURE_PV_TLB_FLUSH);
+                            (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
 
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
index 924ac8c..cc5fe7a 100644 (file)
@@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
         */
        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
-       kvm_lapic_reset(vcpu, false);
        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
        return 0;
index 46ff304..f551962 100644 (file)
@@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
                return RET_PF_RETRY;
        }
 
-       return -EFAULT;
+       return RET_PF_EMULATE;
 }
 
 static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
index b3e488a..cbd7ab7 100644 (file)
@@ -49,6 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
@@ -300,6 +301,8 @@ module_param(vgif, int, 0444);
 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1383,6 +1386,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_SKINIT);
        set_intercept(svm, INTERCEPT_WBINVD);
        set_intercept(svm, INTERCEPT_XSETBV);
+       set_intercept(svm, INTERCEPT_RSM);
 
        if (!kvm_mwait_in_guest()) {
                set_intercept(svm, INTERCEPT_MONITOR);
@@ -3699,6 +3703,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
        return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+static int rsm_interception(struct vcpu_svm *svm)
+{
+       return x86_emulate_instruction(&svm->vcpu, 0, 0,
+                                      rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
 static int rdpmc_interception(struct vcpu_svm *svm)
 {
        int err;
@@ -4541,7 +4551,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
-       [SVM_EXIT_RSM]                          = emulate_on_interception,
+       [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
 };
@@ -5355,7 +5365,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         * being speculatively taken.
         */
        if (svm->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 
        asm volatile (
                "push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5474,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         * If the L02 MSR bitmap does not intercept the MSR, then we need to
         * save it.
         */
-       if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-               rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+       if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+               svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
        if (svm->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
        /* Eliminate branch target predictions from guest mode */
        vmexit_fill_RSB();
@@ -6236,16 +6246,18 @@ e_free:
 
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
+       void __user *measure = (void __user *)(uintptr_t)argp->data;
        struct kvm_sev_info *sev = &kvm->arch.sev_info;
        struct sev_data_launch_measure *data;
        struct kvm_sev_launch_measure params;
+       void __user *p = NULL;
        void *blob = NULL;
        int ret;
 
        if (!sev_guest(kvm))
                return -ENOTTY;
 
-       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+       if (copy_from_user(&params, measure, sizeof(params)))
                return -EFAULT;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6268,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!params.len)
                goto cmd;
 
-       if (params.uaddr) {
+       p = (void __user *)(uintptr_t)params.uaddr;
+       if (p) {
                if (params.len > SEV_FW_BLOB_MAX_SIZE) {
                        ret = -EINVAL;
                        goto e_free;
                }
 
-               if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
-                       ret = -EFAULT;
-                       goto e_free;
-               }
-
                ret = -ENOMEM;
                blob = kmalloc(params.len, GFP_KERNEL);
                if (!blob)
@@ -6290,13 +6298,13 @@ cmd:
                goto e_free_blob;
 
        if (blob) {
-               if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+               if (copy_to_user(p, blob, params.len))
                        ret = -EFAULT;
        }
 
 done:
        params.len = data->len;
-       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+       if (copy_to_user(measure, &params, sizeof(params)))
                ret = -EFAULT;
 e_free_blob:
        kfree(blob);
@@ -6597,7 +6605,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
        struct page **pages;
        void *blob, *hdr;
        unsigned long n;
-       int ret;
+       int ret, offset;
 
        if (!sev_guest(kvm))
                return -ENOTTY;
@@ -6623,6 +6631,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (!data)
                goto e_unpin_memory;
 
+       offset = params.guest_uaddr & (PAGE_SIZE - 1);
+       data->guest_address = __sme_page_pa(pages[0]) + offset;
+       data->guest_len = params.guest_len;
+
        blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
        if (IS_ERR(blob)) {
                ret = PTR_ERR(blob);
@@ -6637,8 +6649,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
                ret = PTR_ERR(hdr);
                goto e_free_blob;
        }
-       data->trans_address = __psp_pa(blob);
-       data->trans_len = params.trans_len;
+       data->hdr_address = __psp_pa(hdr);
+       data->hdr_len = params.hdr_len;
 
        data->handle = sev->handle;
        ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
index 3dec126..cab6ea1 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/microcode.h>
 #include <asm/nospec-branch.h>
 
 #include "trace.h"
@@ -4485,7 +4486,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
                              SECONDARY_EXEC_DESC);
                hw_cr4 &= ~X86_CR4_UMIP;
-       } else
+       } else if (!is_guest_mode(vcpu) ||
+                  !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
                vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                SECONDARY_EXEC_DESC);
 
@@ -9452,7 +9454,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * being speculatively taken.
         */
        if (vmx->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
@@ -9587,11 +9589,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * If the L02 MSR bitmap does not intercept the MSR, then we need to
         * save it.
         */
-       if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
-               rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+       if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+               vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
        if (vmx->spec_ctrl)
-               wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+               native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
        /* Eliminate branch target predictions from guest mode */
        vmexit_fill_RSB();
@@ -11199,7 +11201,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        if (ret)
                return ret;
 
-       if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+       /*
+        * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+        * by event injection, halt vcpu.
+        */
+       if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
                return kvm_vcpu_halt(vcpu);
 
        vmx->nested.nested_run_pending = 1;
index c8a0b54..96edda8 100644 (file)
@@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        kvm_vcpu_mtrr_init(vcpu);
        vcpu_load(vcpu);
        kvm_vcpu_reset(vcpu, false);
+       kvm_lapic_reset(vcpu, false);
        kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
        return 0;
@@ -8460,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                        return r;
        }
 
-       if (!size) {
-               r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
-               WARN_ON(r < 0);
-       }
+       if (!size)
+               vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
 
        return 0;
 }
index 91e9700..25a972c 100644 (file)
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
index 480edc3..c909961 100644 (file)
@@ -7,7 +7,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
 
 .macro THUNK reg
        .section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
 GENERATE_THUNK(r14)
 GENERATE_THUNK(r15)
 #endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
-       mov     $(\nr / 2), %_ASM_BX
-       .align 16
-771:
-       call    772f
-773:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     773b
-       .align 16
-772:
-       call    774f
-775:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     775b
-       .align 16
-774:
-       dec     %_ASM_BX
-       jnz     771b
-       add     $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS         16      /* To avoid underflow */
-
-ENTRY(__fill_rsb)
-       STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
-       ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
-       STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
-       ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
index 800de81..c88573d 100644 (file)
@@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
        tsk = current;
        mm = tsk->mm;
 
-       /*
-        * Detect and handle instructions that would cause a page fault for
-        * both a tracked kernel page and a userspace page.
-        */
        prefetchw(&mm->mmap_sem);
 
        if (unlikely(kmmio_fault(regs, address)))
index 01f682c..40a6085 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/page.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
+#include <asm/nospec-branch.h>
 
        .text
        .code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
        movq    %rax, %r8               /* Workarea encryption routine */
        addq    $PAGE_SIZE, %r8         /* Workarea intermediate copy buffer */
 
+       ANNOTATE_RETPOLINE_SAFE
        call    *%rax                   /* Call the encryption routine */
 
        pop     %r12
index de53bd1..24bb759 100644 (file)
@@ -102,7 +102,7 @@ ENTRY(startup_32)
         * don't we'll eventually crash trying to execute encrypted
         * instructions.
         */
-       bt      $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+       btl     $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
        jnc     .Ldone
        movl    $MSR_K8_SYSCFG, %ecx
        rdmsr
index 4d46003..cdaeeea 100644 (file)
@@ -630,7 +630,7 @@ static int sysc_init_dts_quirks(struct sysc *ddata)
        for (i = 0; i < ARRAY_SIZE(sysc_dts_quirks); i++) {
                prop = of_get_property(np, sysc_dts_quirks[i].name, &len);
                if (!prop)
-                       break;
+                       continue;
 
                ddata->cfg.quirks |= sysc_dts_quirks[i].mask;
        }
index 4d1dc8b..f95b9c7 100644 (file)
@@ -457,7 +457,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
                            size_t count)
 {
        int size = 0;
-       int expected;
+       u32 expected;
 
        if (!chip)
                return -EBUSY;
@@ -474,7 +474,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf,
        }
 
        expected = be32_to_cpu(*(__be32 *)(buf + 2));
-       if (expected > count) {
+       if (expected > count || expected < TPM_HEADER_SIZE) {
                size = -EIO;
                goto out;
        }
index 76df4fb..9e80a95 100644 (file)
@@ -1190,6 +1190,10 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
                        break;
 
                recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
+               if (recd > num_bytes) {
+                       total = -EFAULT;
+                       break;
+               }
 
                rlength = be32_to_cpu(tpm_cmd.header.out.length);
                if (rlength < offsetof(struct tpm_getrandom_out, rng_data) +
index c17e753..a700f8f 100644 (file)
@@ -683,6 +683,10 @@ static int tpm2_unseal_cmd(struct tpm_chip *chip,
        if (!rc) {
                data_len = be16_to_cpup(
                        (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
+               if (data_len < MIN_KEY_SIZE ||  data_len > MAX_KEY_SIZE + 1) {
+                       rc = -EFAULT;
+                       goto out;
+               }
 
                rlength = be32_to_cpu(((struct tpm2_cmd *)&buf)
                                        ->header.out.length);
index c1dd39e..6116cd0 100644 (file)
@@ -473,7 +473,8 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count)
 static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        int size = 0;
-       int expected, status;
+       int status;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                size = -EIO;
@@ -488,7 +489,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        }
 
        expected = be32_to_cpu(*(__be32 *)(buf + 2));
-       if ((size_t) expected > count) {
+       if (((size_t) expected > count) || (expected < TPM_HEADER_SIZE)) {
                size = -EIO;
                goto out;
        }
index c642877..caa86b1 100644 (file)
@@ -281,7 +281,11 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        struct device *dev = chip->dev.parent;
        struct i2c_client *client = to_i2c_client(dev);
        s32 rc;
-       int expected, status, burst_count, retries, size = 0;
+       int status;
+       int burst_count;
+       int retries;
+       int size = 0;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                i2c_nuvoton_ready(chip);    /* return to idle */
@@ -323,7 +327,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
                 * to machine native
                 */
                expected = be32_to_cpu(*(__be32 *) (buf + 2));
-               if (expected > count) {
+               if (expected > count || expected < size) {
                        dev_err(dev, "%s() expected > count\n", __func__);
                        size = -EIO;
                        continue;
index 183a5f5..da074e3 100644 (file)
@@ -270,7 +270,8 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
        int size = 0;
-       int expected, status;
+       int status;
+       u32 expected;
 
        if (count < TPM_HEADER_SIZE) {
                size = -EIO;
@@ -285,7 +286,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        }
 
        expected = be32_to_cpu(*(__be32 *) (buf + 2));
-       if (expected > count) {
+       if (expected > count || expected < TPM_HEADER_SIZE) {
                size = -EIO;
                goto out;
        }
index fcfa5b1..b3afb6c 100644 (file)
@@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error)
 {
        int ret;
 
-       ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error);
+       ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
        if (ret)
                return ret;
 
@@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
                        return rc;
        }
 
-       return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error);
+       return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
@@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
                        return rc;
        }
 
-       return __sev_do_cmd_locked(cmd, 0, &argp->error);
+       return __sev_do_cmd_locked(cmd, NULL, &argp->error);
 }
 
 static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission);
 
 int sev_guest_df_flush(int *error)
 {
-       return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error);
+       return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
 }
 EXPORT_SYMBOL_GPL(sev_guest_df_flush);
 
index f34430f..8721002 100644 (file)
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS           4       /* Max channels per MC */
+#define NUM_CHANNELS           6       /* Max channels per MC */
 #define MAX_DIMMS              3       /* Max DIMMS per channel */
 #define KNL_MAX_CHAS           38      /* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS       6       /* KNL max num. of PCI channels */
index 564bb7a..84e5a9d 100644 (file)
@@ -241,6 +241,19 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 
                desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
                                                &of_flags);
+               /*
+                * -EPROBE_DEFER in our case means that we found a
+                * valid GPIO property, but no controller has been
+                * registered so far.
+                *
+                * This means we don't need to look any further for
+                * alternate name conventions, and we should really
+                * preserve the return code for our user to be able to
+                * retry probing later.
+                */
+               if (IS_ERR(desc) && PTR_ERR(desc) == -EPROBE_DEFER)
+                       return desc;
+
                if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
                        break;
        }
@@ -250,7 +263,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
                desc = of_find_spi_gpio(dev, con_id, &of_flags);
 
        /* Special handling for regulator GPIOs if used */
-       if (IS_ERR(desc))
+       if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
                desc = of_find_regulator_gpio(dev, con_id, &of_flags);
 
        if (IS_ERR(desc))
index d5a2eef..74edba1 100644 (file)
@@ -1156,7 +1156,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 512      /* Reserve at most 512 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 128      /* Reserve at most 128 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
        struct amdgpu_bo        *wb_obj;
index 00a50cc..af1b879 100644 (file)
@@ -492,7 +492,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
                memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
                /* clear wb memory */
-               memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
+               memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
        }
 
        return 0;
@@ -530,8 +530,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
  */
 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
 {
+       wb >>= 3;
        if (wb < adev->wb.num_wb)
-               __clear_bit(wb >> 3, adev->wb.used);
+               __clear_bit(wb, adev->wb.used);
 }
 
 /**
@@ -1455,11 +1456,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.hw)
                        continue;
-               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
-                       amdgpu_free_static_csa(adev);
-                       amdgpu_device_wb_fini(adev);
-                       amdgpu_device_vram_scratch_fini(adev);
-               }
 
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
                        adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1486,6 +1482,13 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.sw)
                        continue;
+
+               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+                       amdgpu_free_static_csa(adev);
+                       amdgpu_device_wb_fini(adev);
+                       amdgpu_device_vram_scratch_fini(adev);
+               }
+
                r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
                /* XXX handle errors */
                if (r) {
@@ -2284,14 +2287,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                                drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
                        }
                        drm_modeset_unlock_all(dev);
-               } else {
-                       /*
-                        * There is no equivalent atomic helper to turn on
-                        * display, so we defined our own function for this,
-                        * once suspend resume is supported by the atomic
-                        * framework this will be reworked
-                        */
-                       amdgpu_dm_display_resume(adev);
                }
        }
 
@@ -2726,7 +2721,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
        if (amdgpu_device_has_dc_support(adev)) {
                if (drm_atomic_helper_resume(adev->ddev, state))
                        dev_info(adev->dev, "drm resume failed:%d\n", r);
-               amdgpu_dm_display_resume(adev);
        } else {
                drm_helper_resume_force_mode(adev->ddev);
        }
index e14ab34..7c2be32 100644 (file)
@@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
 static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
 {
        struct amdgpu_gtt_mgr *mgr = man->priv;
-
+       spin_lock(&mgr->lock);
        drm_mm_takedown(&mgr->mm);
        spin_unlock(&mgr->lock);
        kfree(mgr);
index 56bcd59..36483e0 100644 (file)
@@ -257,7 +257,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
        r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
        if (r) {
                adev->irq.installed = false;
-               flush_work(&adev->hotplug_work);
+               if (!amdgpu_device_has_dc_support(adev))
+                       flush_work(&adev->hotplug_work);
                cancel_work_sync(&adev->reset_work);
                return r;
        }
@@ -282,7 +283,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
                adev->irq.installed = false;
                if (adev->irq.msi_enabled)
                        pci_disable_msi(adev->pdev);
-               flush_work(&adev->hotplug_work);
+               if (!amdgpu_device_has_dc_support(adev))
+                       flush_work(&adev->hotplug_work);
                cancel_work_sync(&adev->reset_work);
        }
 
index 2719937..3b7e7af 100644 (file)
@@ -634,7 +634,7 @@ static int gmc_v9_0_late_init(void *handle)
        for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
                BUG_ON(vm_inv_eng[i] > 16);
 
-       if (adev->asic_type == CHIP_VEGA10) {
+       if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
                r = gmc_v9_0_ecc_available(adev);
                if (r == 1) {
                        DRM_INFO("ECC is active.\n");
@@ -682,7 +682,10 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
        if (!adev->mc.vram_width) {
                /* hbm memory channel size */
-               chansize = 128;
+               if (adev->flags & AMD_IS_APU)
+                       chansize = 64;
+               else
+                       chansize = 128;
 
                tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
                tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
index e92fb37..91cf95a 100644 (file)
@@ -238,31 +238,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
 static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       u64 *wptr = NULL;
-       uint64_t local_wptr = 0;
+       u64 wptr;
 
        if (ring->use_doorbell) {
                /* XXX check if swapping is necessary on BE */
-               wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
-               DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
-               *wptr = (*wptr) >> 2;
-               DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+               wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+               DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
        } else {
                u32 lowbit, highbit;
                int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
-               wptr = &local_wptr;
                lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
                highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
 
                DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
                                me, highbit, lowbit);
-               *wptr = highbit;
-               *wptr = (*wptr) << 32;
-               *wptr |= lowbit;
+               wptr = highbit;
+               wptr = wptr << 32;
+               wptr |= lowbit;
        }
 
-       return *wptr;
+       return wptr >> 2;
 }
 
 /**
index b2bfeda..9bab484 100644 (file)
@@ -1618,7 +1618,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
        .set_wptr = uvd_v6_0_enc_ring_set_wptr,
        .emit_frame_size =
                4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */
-               6 + /* uvd_v6_0_enc_ring_emit_vm_flush */
+               5 + /* uvd_v6_0_enc_ring_emit_vm_flush */
                5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */
                1, /* uvd_v6_0_enc_ring_insert_end */
        .emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */
index 1ce4c98..862835d 100644 (file)
@@ -629,11 +629,13 @@ static int dm_resume(void *handle)
 {
        struct amdgpu_device *adev = handle;
        struct amdgpu_display_manager *dm = &adev->dm;
+       int ret = 0;
 
        /* power on hardware */
        dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
 
-       return 0;
+       ret = amdgpu_dm_display_resume(adev);
+       return ret;
 }
 
 int amdgpu_dm_display_resume(struct amdgpu_device *adev)
index 33d91e4..639421a 100644 (file)
@@ -1465,7 +1465,7 @@ void decide_link_settings(struct dc_stream_state *stream,
        /* MST doesn't perform link training for now
         * TODO: add MST specific link training routine
         */
-       if (is_mst_supported(link)) {
+       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
                *link_setting = link->verified_link_cap;
                return;
        }
index 261811e..539c3e0 100644 (file)
@@ -197,7 +197,8 @@ bool dc_stream_set_cursor_attributes(
        for (i = 0; i < MAX_PIPES; i++) {
                struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
 
-               if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
+               if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm &&
+                   !pipe_ctx->plane_res.dpp) || !pipe_ctx->plane_res.ipp)
                        continue;
                if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
                        continue;
@@ -273,7 +274,8 @@ bool dc_stream_set_cursor_position(
                if (pipe_ctx->stream != stream ||
                                (!pipe_ctx->plane_res.mi  && !pipe_ctx->plane_res.hubp) ||
                                !pipe_ctx->plane_state ||
-                               (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp))
+                               (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) ||
+                               !pipe_ctx->plane_res.ipp)
                        continue;
 
                if (pipe_ctx->plane_state->address.type
index 41e42be..08e8a79 100644 (file)
@@ -2756,10 +2756,13 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
                                    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-       disable_mclk_switching = ((1 < info.display_count) ||
-                                 disable_mclk_switching_for_frame_lock ||
-                                 smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
-                                 (mode_info.refresh_rate > 120));
+       if (info.display_count == 0)
+               disable_mclk_switching = false;
+       else
+               disable_mclk_switching = ((1 < info.display_count) ||
+                                         disable_mclk_switching_for_frame_lock ||
+                                         smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) ||
+                                         (mode_info.refresh_rate > 120));
 
        sclk = smu7_ps->performance_levels[0].engine_clock;
        mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -4534,13 +4537,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr,
        int tmp_result, result = 0;
        uint32_t sclk_mask = 0, mclk_mask = 0;
 
-       if (hwmgr->chip_id == CHIP_FIJI) {
-               if (request->type == AMD_PP_GFX_PROFILE)
-                       smu7_enable_power_containment(hwmgr);
-               else if (request->type == AMD_PP_COMPUTE_PROFILE)
-                       smu7_disable_power_containment(hwmgr);
-       }
-
        if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_AUTO)
                return -EINVAL;
 
index 2d55dab..5f9c3ef 100644 (file)
@@ -3168,10 +3168,13 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
        disable_mclk_switching_for_vr = PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
        force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
 
-       disable_mclk_switching = (info.display_count > 1) ||
-                                   disable_mclk_switching_for_frame_lock ||
-                                   disable_mclk_switching_for_vr ||
-                                   force_mclk_high;
+       if (info.display_count == 0)
+               disable_mclk_switching = false;
+       else
+               disable_mclk_switching = (info.display_count > 1) ||
+                       disable_mclk_switching_for_frame_lock ||
+                       disable_mclk_switching_for_vr ||
+                       force_mclk_high;
 
        sclk = vega10_ps->performance_levels[0].gfx_clock;
        mclk = vega10_ps->performance_levels[0].mem_clock;
index 5a13ff2..c0530a1 100644 (file)
@@ -121,6 +121,10 @@ int drm_mode_addfb(struct drm_device *dev,
        r.pixel_format = drm_mode_legacy_fb_format(or->bpp, or->depth);
        r.handles[0] = or->handle;
 
+       if (r.pixel_format == DRM_FORMAT_XRGB2101010 &&
+           dev->driver->driver_features & DRIVER_PREFER_XBGR_30BPP)
+               r.pixel_format = DRM_FORMAT_XBGR2101010;
+
        ret = drm_mode_addfb2(dev, &r, file_priv);
        if (ret)
                return ret;
index 4401068..3ab1ace 100644 (file)
@@ -505,6 +505,8 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
                list_add_tail(&vma->exec_link, &eb->unbound);
                if (drm_mm_node_allocated(&vma->node))
                        err = i915_vma_unbind(vma);
+               if (unlikely(err))
+                       vma->exec_flags = NULL;
        }
        return err;
 }
@@ -2410,7 +2412,7 @@ err_request:
        if (out_fence) {
                if (err == 0) {
                        fd_install(out_fence_fd, out_fence->file);
-                       args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+                       args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
                        args->rsvd2 |= (u64)out_fence_fd << 32;
                        out_fence_fd = -1;
                } else {
index e09d18d..a3e93d4 100644 (file)
@@ -476,8 +476,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
        GEM_BUG_ON(!irqs_disabled());
        lockdep_assert_held(&engine->timeline->lock);
 
-       trace_i915_gem_request_execute(request);
-
        /* Transfer from per-context onto the global per-engine timeline */
        timeline = engine->timeline;
        GEM_BUG_ON(timeline == request->timeline);
@@ -501,6 +499,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
        list_move_tail(&request->link, &timeline->requests);
        spin_unlock(&request->timeline->lock);
 
+       trace_i915_gem_request_execute(request);
+
        wake_up_all(&request->execute);
 }
 
index a2108e3..33eb0c5 100644 (file)
@@ -2027,7 +2027,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW5_LN0_AE                0x162454
 #define _CNL_PORT_TX_DW5_LN0_B         0x162654
 #define _CNL_PORT_TX_DW5_LN0_C         0x162C54
-#define _CNL_PORT_TX_DW5_LN0_D         0x162ED4
+#define _CNL_PORT_TX_DW5_LN0_D         0x162E54
 #define _CNL_PORT_TX_DW5_LN0_F         0x162854
 #define CNL_PORT_TX_DW5_GRP(port)      _MMIO_PORT6(port, \
                                                    _CNL_PORT_TX_DW5_GRP_AE, \
@@ -2058,7 +2058,7 @@ enum i915_power_well_id {
 #define _CNL_PORT_TX_DW7_LN0_AE                0x16245C
 #define _CNL_PORT_TX_DW7_LN0_B         0x16265C
 #define _CNL_PORT_TX_DW7_LN0_C         0x162C5C
-#define _CNL_PORT_TX_DW7_LN0_D         0x162EDC
+#define _CNL_PORT_TX_DW7_LN0_D         0x162E5C
 #define _CNL_PORT_TX_DW7_LN0_F         0x16285C
 #define CNL_PORT_TX_DW7_GRP(port)      _MMIO_PORT6(port, \
                                                    _CNL_PORT_TX_DW7_GRP_AE, \
index 522d54f..4a01f62 100644 (file)
@@ -779,11 +779,11 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv,
 {
        struct intel_encoder *encoder;
 
-       if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
-               return NULL;
-
        /* MST */
        if (pipe >= 0) {
+               if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map)))
+                       return NULL;
+
                encoder = dev_priv->av_enc_map[pipe];
                /*
                 * when bootup, audio driver may not know it is
index dd8d435..caddce8 100644 (file)
@@ -4477,6 +4477,7 @@ nv50_display_create(struct drm_device *dev)
        nouveau_display(dev)->fini = nv50_display_fini;
        disp->disp = &nouveau_display(dev)->disp;
        dev->mode_config.funcs = &nv50_disp_func;
+       dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
        if (nouveau_atomic)
                dev->driver->driver_features |= DRIVER_ATOMIC;
 
index 8d3e3d2..7828a5e 100644 (file)
@@ -1365,6 +1365,10 @@ int radeon_device_init(struct radeon_device *rdev,
        if ((rdev->flags & RADEON_IS_PCI) &&
            (rdev->family <= CHIP_RS740))
                rdev->need_dma32 = true;
+#ifdef CONFIG_PPC64
+       if (rdev->family == CHIP_CEDAR)
+               rdev->need_dma32 = true;
+#endif
 
        dma_bits = rdev->need_dma32 ? 32 : 40;
        r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
index 326ad06..4b65425 100644 (file)
@@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev);
 static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish);
 static void radeon_pm_update_profile(struct radeon_device *rdev);
 static void radeon_pm_set_clocks(struct radeon_device *rdev);
-static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev);
 
 int radeon_pm_get_type_index(struct radeon_device *rdev,
                             enum radeon_pm_state_type ps_type,
@@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
                                radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
                }
                mutex_unlock(&rdev->pm.mutex);
-               /* allow new DPM state to be picked */
-               radeon_pm_compute_clocks_dpm(rdev);
        } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
                if (rdev->pm.profile == PM_PROFILE_AUTO) {
                        mutex_lock(&rdev->pm.mutex);
@@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
                dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF;
        /* balanced states don't exist at the moment */
        if (dpm_state == POWER_STATE_TYPE_BALANCED)
-               dpm_state = rdev->pm.dpm.ac_power ?
-                       POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY;
+               dpm_state = POWER_STATE_TYPE_PERFORMANCE;
 
 restart_search:
        /* Pick the best power state based on current conditions */
index 3c15cf2..b396011 100644 (file)
@@ -260,7 +260,7 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon,
                                        const struct drm_display_mode *mode)
 {
        /* Configure the dot clock */
-       clk_set_rate(tcon->dclk, mode->crtc_clock * 1000);
+       clk_set_rate_exclusive(tcon->dclk, mode->crtc_clock * 1000);
 
        /* Set the resolution */
        regmap_write(tcon->regs, SUN4I_TCON0_BASIC0_REG,
@@ -335,6 +335,9 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
        regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG,
                           SUN4I_TCON_GCTL_IOMAP_MASK,
                           SUN4I_TCON_GCTL_IOMAP_TCON0);
+
+       /* Enable the output on the pins */
+       regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0xe0000000);
 }
 
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
@@ -418,7 +421,7 @@ static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon,
        WARN_ON(!tcon->quirks->has_channel_1);
 
        /* Configure the dot clock */
-       clk_set_rate(tcon->sclk1, mode->crtc_clock * 1000);
+       clk_set_rate_exclusive(tcon->sclk1, mode->crtc_clock * 1000);
 
        /* Adjust clock delay */
        clk_delay = sun4i_tcon_get_clk_delay(mode, 1);
index 5720a0d..677ac16 100644 (file)
@@ -197,6 +197,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
        case VIRTGPU_PARAM_3D_FEATURES:
                value = vgdev->has_virgl_3d == true ? 1 : 0;
                break;
+       case VIRTGPU_PARAM_CAPSET_QUERY_FIX:
+               value = 1;
+               break;
        default:
                return -EINVAL;
        }
@@ -472,7 +475,7 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 {
        struct virtio_gpu_device *vgdev = dev->dev_private;
        struct drm_virtgpu_get_caps *args = data;
-       int size;
+       unsigned size, host_caps_size;
        int i;
        int found_valid = -1;
        int ret;
@@ -481,6 +484,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
        if (vgdev->num_capsets == 0)
                return -ENOSYS;
 
+       /* don't allow userspace to pass 0 */
+       if (args->size == 0)
+               return -EINVAL;
+
        spin_lock(&vgdev->display_info_lock);
        for (i = 0; i < vgdev->num_capsets; i++) {
                if (vgdev->capsets[i].id == args->cap_set_id) {
@@ -496,11 +503,9 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
                return -EINVAL;
        }
 
-       size = vgdev->capsets[found_valid].max_size;
-       if (args->size > size) {
-               spin_unlock(&vgdev->display_info_lock);
-               return -EINVAL;
-       }
+       host_caps_size = vgdev->capsets[found_valid].max_size;
+       /* only copy to user the minimum of the host caps size or the guest caps size */
+       size = min(args->size, host_caps_size);
 
        list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
                if (cache_ent->id == args->cap_set_id &&
index e40065b..0a7e99d 100644 (file)
@@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
                seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
        }
        rcu_read_unlock();
-       seq_printf (seq, "]");
+       seq_putc(seq, ']');
 }
 
 static int multipath_congested(struct mddev *mddev, int bits)
index bc67ab6..254e44e 100644 (file)
@@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
        struct bio *bio;
        int ff = 0;
 
+       if (!page)
+               return;
+
        if (test_bit(Faulty, &rdev->flags))
                return;
 
@@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
         * the only valid external interface is through the md
         * device.
         */
+       mddev->has_superblocks = false;
        rdev_for_each(rdev, mddev) {
                if (test_bit(Faulty, &rdev->flags))
                        continue;
@@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
                                set_disk_ro(mddev->gendisk, 1);
                }
 
+               if (rdev->sb_page)
+                       mddev->has_superblocks = true;
+
                /* perform some consistency tests on the device.
                 * We don't want the data to overlap the metadata,
                 * Internal Bitmap issues have been handled elsewhere.
@@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
        }
        if (mddev->sync_set == NULL) {
                mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-               if (!mddev->sync_set)
-                       return -ENOMEM;
+               if (!mddev->sync_set) {
+                       err = -ENOMEM;
+                       goto abort;
+               }
        }
 
        spin_lock(&pers_lock);
@@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
                else
                        pr_warn("md: personality for level %s is not loaded!\n",
                                mddev->clevel);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
        spin_unlock(&pers_lock);
        if (mddev->level != pers->level) {
@@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
            pers->start_reshape == NULL) {
                /* This personality cannot handle reshaping... */
                module_put(pers->owner);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
 
        if (pers->sync_request) {
@@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
                mddev->private = NULL;
                module_put(pers->owner);
                bitmap_destroy(mddev);
-               return err;
+               goto abort;
        }
        if (mddev->queue) {
                bool nonrot = true;
@@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
        sysfs_notify_dirent_safe(mddev->sysfs_action);
        sysfs_notify(&mddev->kobj, NULL, "degraded");
        return 0;
+
+abort:
+       if (mddev->bio_set) {
+               bioset_free(mddev->bio_set);
+               mddev->bio_set = NULL;
+       }
+       if (mddev->sync_set) {
+               bioset_free(mddev->sync_set);
+               mddev->sync_set = NULL;
+       }
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
 
@@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
 bool md_write_start(struct mddev *mddev, struct bio *bi)
 {
        int did_change = 0;
+
        if (bio_data_dir(bi) != WRITE)
                return true;
 
@@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
        rcu_read_unlock();
        if (did_change)
                sysfs_notify_dirent_safe(mddev->sysfs_state);
+       if (!mddev->has_superblocks)
+               return true;
        wait_event(mddev->sb_wait,
                   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
                   mddev->suspended);
@@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
        set_mask_bits(&mddev->sb_flags, 0,
                      BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
 
+       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+                       !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+                       mddev->delta_disks > 0 &&
+                       mddev->pers->finish_reshape &&
+                       mddev->pers->size &&
+                       mddev->queue) {
+               mddev_lock_nointr(mddev);
+               md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
+               mddev_unlock(mddev);
+               set_capacity(mddev->gendisk, mddev->array_sectors);
+               revalidate_disk(mddev->gendisk);
+       }
+
        spin_lock(&mddev->lock);
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                /* We completed so min/max setting can be forgotten if used. */
@@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
        int removed = 0;
        bool remove_some = false;
 
+       if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+               /* Mustn't remove devices when resync thread is running */
+               return 0;
+
        rdev_for_each(rdev, mddev) {
                if ((this == NULL || rdev == this) &&
                    rdev->raid_disk >= 0 &&
index 58cd20a..fbc925c 100644 (file)
@@ -468,6 +468,8 @@ struct mddev {
        void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
        struct md_cluster_info          *cluster_info;
        unsigned int                    good_device_nr; /* good device num within cluster raid */
+
+       bool    has_superblocks:1;
 };
 
 enum recovery_flags {
index f978edd..fe872dc 100644 (file)
@@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                        struct md_rdev *repl =
                                conf->mirrors[conf->raid_disks + number].rdev;
                        freeze_array(conf, 0);
+                       if (atomic_read(&repl->nr_pending)) {
+                               /* It means that some queued IO of retry_list
+                                * hold repl. Thus, we cannot set replacement
+                                * as NULL, avoiding rdev NULL pointer
+                                * dereference in sync_request_write and
+                                * handle_write_finished.
+                                */
+                               err = -EBUSY;
+                               unfreeze_array(conf);
+                               goto abort;
+                       }
                        clear_bit(Replacement, &repl->flags);
                        p->rdev = repl;
                        conf->mirrors[conf->raid_disks + number].rdev = NULL;
index c7294e7..eb84bc6 100644 (file)
 #define BARRIER_BUCKETS_NR_BITS                (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
 #define BARRIER_BUCKETS_NR             (1<<BARRIER_BUCKETS_NR_BITS)
 
+/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
+ * There are three safe ways to access raid1_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery is known to be happening - i.e. in code that is
+ *    called as part of performing resync/recovery.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid1_info {
        struct md_rdev  *rdev;
        sector_t        head_position;
index 99c9207..c5e6c60 100644 (file)
@@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
 #define RESYNC_WINDOW (1024*1024)
 /* maximum number of concurrent requests, memory permitting */
 #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
-#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
+#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
 #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
 
 /*
@@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                for (m = 0; m < conf->copies; m++) {
                        int dev = r10_bio->devs[m].devnum;
                        rdev = conf->mirrors[dev].rdev;
-                       if (r10_bio->devs[m].bio == NULL)
+                       if (r10_bio->devs[m].bio == NULL ||
+                               r10_bio->devs[m].bio->bi_end_io == NULL)
                                continue;
                        if (!r10_bio->devs[m].bio->bi_status) {
                                rdev_clear_badblocks(
@@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                                        md_error(conf->mddev, rdev);
                        }
                        rdev = conf->mirrors[dev].replacement;
-                       if (r10_bio->devs[m].repl_bio == NULL)
+                       if (r10_bio->devs[m].repl_bio == NULL ||
+                               r10_bio->devs[m].repl_bio->bi_end_io == NULL)
                                continue;
 
                        if (!r10_bio->devs[m].repl_bio->bi_status) {
@@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
                if (fc > 1 || fo > 0) {
                        pr_err("only near layout is supported by clustered"
                                " raid10\n");
-                       goto out;
+                       goto out_free_conf;
                }
        }
 
@@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
                return;
 
        if (mddev->delta_disks > 0) {
-               sector_t size = raid10_size(mddev, 0, 0);
-               md_set_array_sectors(mddev, size);
                if (mddev->recovery_cp > mddev->resync_max_sectors) {
                        mddev->recovery_cp = mddev->resync_max_sectors;
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                }
-               mddev->resync_max_sectors = size;
-               if (mddev->queue) {
-                       set_capacity(mddev->gendisk, mddev->array_sectors);
-                       revalidate_disk(mddev->gendisk);
-               }
+               mddev->resync_max_sectors = mddev->array_sectors;
        } else {
                int d;
                rcu_read_lock();
index db2ac22..e2e8840 100644 (file)
@@ -2,6 +2,19 @@
 #ifndef _RAID10_H
 #define _RAID10_H
 
+/* Note: raid10_info.rdev can be set to NULL asynchronously by
+ * raid10_remove_disk.
+ * There are three safe ways to access raid10_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
+ *    that is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the
+ *    RCU lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if it has
+ * been incremented, the pointer is put back in .rdev.
+ */
+
 struct raid10_info {
        struct md_rdev  *rdev, *replacement;
        sector_t        head_position;
index 0c76bce..a001808 100644 (file)
@@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
 extern void ppl_stripe_write_finished(struct stripe_head *sh);
 extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
 extern void ppl_quiesce(struct r5conf *conf, int quiesce);
+extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
 
 static inline bool raid5_has_ppl(struct r5conf *conf)
 {
@@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
        if (conf->log)
                ret = r5l_handle_flush_request(conf->log, bio);
        else if (raid5_has_ppl(conf))
-               ret = 0;
+               ret = ppl_handle_flush_request(conf->log, bio);
 
        return ret;
 }
index 2764c22..42890a0 100644 (file)
@@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
        }
 }
 
+int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
+{
+       if (bio->bi_iter.bi_size == 0) {
+               bio_endio(bio);
+               return 0;
+       }
+       bio->bi_opf &= ~REQ_PREFLUSH;
+       return -EAGAIN;
+}
+
 void ppl_stripe_write_finished(struct stripe_head *sh)
 {
        struct ppl_io_unit *io;
index 50d0114..b5d2601 100644 (file)
@@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 static int grow_stripes(struct r5conf *conf, int num)
 {
        struct kmem_cache *sc;
+       size_t namelen = sizeof(conf->cache_name[0]);
        int devs = max(conf->raid_disks, conf->previous_raid_disks);
 
        if (conf->mddev->gendisk)
-               sprintf(conf->cache_name[0],
+               snprintf(conf->cache_name[0], namelen,
                        "raid%d-%s", conf->level, mdname(conf->mddev));
        else
-               sprintf(conf->cache_name[0],
+               snprintf(conf->cache_name[0], namelen,
                        "raid%d-%p", conf->level, conf->mddev);
-       sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
+       snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
 
        conf->active_name = 0;
        sc = kmem_cache_create(conf->cache_name[conf->active_name],
@@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)
 
        log_exit(conf);
 
-       if (conf->shrinker.nr_deferred)
-               unregister_shrinker(&conf->shrinker);
-
+       unregister_shrinker(&conf->shrinker);
        free_thread_groups(conf);
        shrink_stripes(conf);
        raid5_free_percpu(conf);
@@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
 
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 
-               if (mddev->delta_disks > 0) {
-                       md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-                       if (mddev->queue) {
-                               set_capacity(mddev->gendisk, mddev->array_sectors);
-                               revalidate_disk(mddev->gendisk);
-                       }
-               } else {
+               if (mddev->delta_disks <= 0) {
                        int d;
                        spin_lock_irq(&conf->device_lock);
                        mddev->degraded = raid5_calc_degraded(conf);
index 2e61238..3f8da26 100644 (file)
@@ -450,6 +450,18 @@ enum {
  * HANDLE gets cleared if stripe_handle leaves nothing locked.
  */
 
+/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
+ * There are three safe ways to access disk_info.rdev.
+ * 1/ when holding mddev->reconfig_mutex
+ * 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
+ *    is called as part of performing resync/recovery/reshape.
+ * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
+ *    and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
+ *    lock.
+ * When .rdev is set to NULL, the nr_pending count checked again and if
+ * it has been incremented, the pointer is put back in .rdev.
+ */
+
 struct disk_info {
        struct md_rdev  *rdev, *replacement;
        struct page     *extra_page; /* extra page to use in prexor */
index 0a7bdbe..e9c1485 100644 (file)
 #define REG_TO_DCPU_MBOX       0x10
 #define REG_TO_HOST_MBOX       0x14
 
+/* Macros to process offsets returned by the DCPU */
+#define DRAM_MSG_ADDR_OFFSET   0x0
+#define DRAM_MSG_TYPE_OFFSET   0x1c
+#define DRAM_MSG_ADDR_MASK     ((1UL << DRAM_MSG_TYPE_OFFSET) - 1)
+#define DRAM_MSG_TYPE_MASK     ((1UL << \
+                                (BITS_PER_LONG - DRAM_MSG_TYPE_OFFSET)) - 1)
+
 /* Message RAM */
-#define DCPU_MSG_RAM(x)                (0x100 + (x) * sizeof(u32))
+#define DCPU_MSG_RAM_START     0x100
+#define DCPU_MSG_RAM(x)                (DCPU_MSG_RAM_START + (x) * sizeof(u32))
 
 /* DRAM Info Offsets & Masks */
 #define DRAM_INFO_INTERVAL     0x0
@@ -255,6 +263,40 @@ static unsigned int get_msg_chksum(const u32 msg[])
        return sum;
 }
 
+static void __iomem *get_msg_ptr(struct private_data *priv, u32 response,
+                                char *buf, ssize_t *size)
+{
+       unsigned int msg_type;
+       unsigned int offset;
+       void __iomem *ptr = NULL;
+
+       msg_type = (response >> DRAM_MSG_TYPE_OFFSET) & DRAM_MSG_TYPE_MASK;
+       offset = (response >> DRAM_MSG_ADDR_OFFSET) & DRAM_MSG_ADDR_MASK;
+
+       /*
+        * msg_type == 1: the offset is relative to the message RAM
+        * msg_type == 0: the offset is relative to the data RAM (this is the
+        *                previous way of passing data)
+        * msg_type is anything else: there's critical hardware problem
+        */
+       switch (msg_type) {
+       case 1:
+               ptr = priv->regs + DCPU_MSG_RAM_START + offset;
+               break;
+       case 0:
+               ptr = priv->dmem + offset;
+               break;
+       default:
+               dev_emerg(priv->dev, "invalid message reply from DCPU: %#x\n",
+                       response);
+               if (buf && size)
+                       *size = sprintf(buf,
+                               "FATAL: communication error with DCPU\n");
+       }
+
+       return ptr;
+}
+
 static int __send_command(struct private_data *priv, unsigned int cmd,
                          u32 result[])
 {
@@ -507,7 +549,7 @@ static ssize_t show_info(struct device *dev, struct device_attribute *devattr,
 {
        u32 response[MSG_FIELD_MAX];
        unsigned int info;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_INFO, response, dev, buf);
        if (ret)
@@ -528,18 +570,19 @@ static ssize_t show_refresh(struct device *dev,
        u32 response[MSG_FIELD_MAX];
        void __iomem *info;
        struct private_data *priv;
-       unsigned int offset;
        u8 refresh, sr_abort, ppre, thermal_offs, tuf;
        u32 mr4;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_REFRESH, response, dev, buf);
        if (ret)
                return ret;
 
        priv = dev_get_drvdata(dev);
-       offset = response[MSG_ARG0];
-       info = priv->dmem + offset;
+
+       info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+       if (!info)
+               return ret;
 
        mr4 = readl_relaxed(info + DRAM_INFO_MR4) & DRAM_INFO_MR4_MASK;
 
@@ -561,7 +604,6 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
        u32 response[MSG_FIELD_MAX];
        struct private_data *priv;
        void __iomem *info;
-       unsigned int offset;
        unsigned long val;
        int ret;
 
@@ -574,8 +616,10 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       offset = response[MSG_ARG0];
-       info = priv->dmem + offset;
+       info = get_msg_ptr(priv, response[MSG_ARG0], NULL, NULL);
+       if (!info)
+               return -EIO;
+
        writel_relaxed(val, info + DRAM_INFO_INTERVAL);
 
        return count;
@@ -587,23 +631,25 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr,
        u32 response[MSG_FIELD_MAX];
        struct private_data *priv;
        void __iomem *info;
-       unsigned int offset;
-       int ret;
+       ssize_t ret;
 
        ret = generic_show(DPFE_CMD_GET_VENDOR, response, dev, buf);
        if (ret)
                return ret;
 
-       offset = response[MSG_ARG0];
        priv = dev_get_drvdata(dev);
-       info = priv->dmem + offset;
+
+       info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret);
+       if (!info)
+               return ret;
 
        return sprintf(buf, "%#x %#x %#x %#x %#x\n",
                       readl_relaxed(info + DRAM_VENDOR_MR5) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR6) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR7) & DRAM_VENDOR_MASK,
                       readl_relaxed(info + DRAM_VENDOR_MR8) & DRAM_VENDOR_MASK,
-                      readl_relaxed(info + DRAM_VENDOR_ERROR));
+                      readl_relaxed(info + DRAM_VENDOR_ERROR) &
+                                    DRAM_VENDOR_MASK);
 }
 
 static int brcmstb_dpfe_resume(struct platform_device *pdev)
index 1fda9d6..4b91ff7 100644 (file)
@@ -716,7 +716,7 @@ static const char * const uart_b_groups[] = {
        "uart_tx_b_x", "uart_rx_b_x", "uart_cts_b_x", "uart_rts_b_x",
 };
 
-static const char * const uart_ao_b_gpioz_groups[] = {
+static const char * const uart_ao_b_z_groups[] = {
        "uart_ao_tx_b_z", "uart_ao_rx_b_z",
        "uart_ao_cts_b_z", "uart_ao_rts_b_z",
 };
@@ -855,7 +855,7 @@ static struct meson_pmx_func meson_axg_periphs_functions[] = {
        FUNCTION(nand),
        FUNCTION(uart_a),
        FUNCTION(uart_b),
-       FUNCTION(uart_ao_b_gpioz),
+       FUNCTION(uart_ao_b_z),
        FUNCTION(i2c0),
        FUNCTION(i2c1),
        FUNCTION(i2c2),
index d1a0131..5e3df19 100644 (file)
@@ -376,6 +376,7 @@ static int intel_hid_remove(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+       device_init_wakeup(&device->dev, false);
        acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
        intel_hid_set_enable(&device->dev, false);
        intel_button_array_enable(&device->dev, false);
index b703d6f..c13780b 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
@@ -97,9 +98,35 @@ out_unknown:
        dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
 }
 
-static int intel_vbtn_probe(struct platform_device *device)
+static void detect_tablet_mode(struct platform_device *device)
 {
+       const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
+       struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
+       acpi_handle handle = ACPI_HANDLE(&device->dev);
        struct acpi_buffer vgbs_output = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *obj;
+       acpi_status status;
+       int m;
+
+       if (!(chassis_type && strcmp(chassis_type, "31") == 0))
+               goto out;
+
+       status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
+       if (ACPI_FAILURE(status))
+               goto out;
+
+       obj = vgbs_output.pointer;
+       if (!(obj && obj->type == ACPI_TYPE_INTEGER))
+               goto out;
+
+       m = !(obj->integer.value & TABLET_MODE_FLAG);
+       input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
+out:
+       kfree(vgbs_output.pointer);
+}
+
+static int intel_vbtn_probe(struct platform_device *device)
+{
        acpi_handle handle = ACPI_HANDLE(&device->dev);
        struct intel_vbtn_priv *priv;
        acpi_status status;
@@ -122,22 +149,7 @@ static int intel_vbtn_probe(struct platform_device *device)
                return err;
        }
 
-       /*
-        * VGBS being present and returning something means we have
-        * a tablet mode switch.
-        */
-       status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output);
-       if (ACPI_SUCCESS(status)) {
-               union acpi_object *obj = vgbs_output.pointer;
-
-               if (obj && obj->type == ACPI_TYPE_INTEGER) {
-                       int m = !(obj->integer.value & TABLET_MODE_FLAG);
-
-                       input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-               }
-       }
-
-       kfree(vgbs_output.pointer);
+       detect_tablet_mode(device);
 
        status = acpi_install_notify_handler(handle,
                                             ACPI_DEVICE_NOTIFY,
@@ -154,6 +166,7 @@ static int intel_vbtn_remove(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
 
+       device_init_wakeup(&device->dev, false);
        acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler);
 
        /*
index c0c8945..8796211 100644 (file)
@@ -945,7 +945,7 @@ static int wmi_dev_probe(struct device *dev)
                wblock->char_dev.mode = 0444;
                ret = misc_register(&wblock->char_dev);
                if (ret) {
-                       dev_warn(dev, "failed to register char dev: %d", ret);
+                       dev_warn(dev, "failed to register char dev: %d\n", ret);
                        ret = -ENOMEM;
                        goto probe_misc_failure;
                }
@@ -1048,7 +1048,7 @@ static int wmi_create_device(struct device *wmi_bus_dev,
 
        if (result) {
                dev_warn(wmi_bus_dev,
-                        "%s data block query control method not found",
+                        "%s data block query control method not found\n",
                         method);
                return result;
        }
@@ -1198,7 +1198,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device)
 
                retval = device_add(&wblock->dev.dev);
                if (retval) {
-                       dev_err(wmi_bus_dev, "failed to register %pULL\n",
+                       dev_err(wmi_bus_dev, "failed to register %pUL\n",
                                wblock->gblock.guid);
                        if (debug_event)
                                wmi_method_enable(wblock, 0);
index cfb42f5..750f931 100644 (file)
@@ -470,13 +470,21 @@ static int imx_gpc_probe(struct platform_device *pdev)
 
 static int imx_gpc_remove(struct platform_device *pdev)
 {
+       struct device_node *pgc_node;
        int ret;
 
+       pgc_node = of_get_child_by_name(pdev->dev.of_node, "pgc");
+
+       /* bail out if DT too old and doesn't provide the necessary info */
+       if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells") &&
+           !pgc_node)
+               return 0;
+
        /*
         * If the old DT binding is used the toplevel driver needs to
         * de-register the power domains
         */
-       if (!of_get_child_by_name(pdev->dev.of_node, "pgc")) {
+       if (!pgc_node) {
                of_genpd_del_provider(pdev->dev.of_node);
 
                ret = pm_genpd_remove(&imx_gpc_domains[GPC_PGC_DOMAIN_PU].base);
index fd97552..05c66e0 100644 (file)
@@ -767,7 +767,7 @@ int
 xfs_scrub_agfl(
        struct xfs_scrub_context        *sc)
 {
-       struct xfs_scrub_agfl_info      sai = { 0 };
+       struct xfs_scrub_agfl_info      sai;
        struct xfs_agf                  *agf;
        xfs_agnumber_t                  agno;
        unsigned int                    agflcount;
@@ -795,6 +795,7 @@ xfs_scrub_agfl(
                xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
                goto out;
        }
+       memset(&sai, 0, sizeof(sai));
        sai.sz_entries = agflcount;
        sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
        if (!sai.entries) {
index 3a55d6f..7a39f40 100644 (file)
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -456,10 +457,12 @@ xfs_cui_recover(
         * transaction.  Normally, any work that needs to be deferred
         * gets attached to the same defer_ops that scheduled the
         * refcount update.  However, we're in log recovery here, so we
-        * we create our own defer_ops and use that to finish up any
-        * work that doesn't fit.
+        * we use the passed in defer_ops and to finish up any work that
+        * doesn't fit.  We need to reserve enough blocks to handle a
+        * full btree split on either end of the refcount range.
         */
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                return error;
        cudp = xfs_trans_get_cud(tp, cuip);
index f3b139c..49d3124 100644 (file)
@@ -23,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
+#include "xfs_shared.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
@@ -470,7 +471,8 @@ xfs_rui_recover(
                }
        }
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+                       mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                return error;
        rudp = xfs_trans_get_rud(tp, ruip);
index 7aba628..93588ea 100644 (file)
@@ -250,6 +250,7 @@ xfs_parseargs(
                                return -EINVAL;
                        break;
                case Opt_logdev:
+                       kfree(mp->m_logname);
                        mp->m_logname = match_strdup(args);
                        if (!mp->m_logname)
                                return -ENOMEM;
@@ -258,6 +259,7 @@ xfs_parseargs(
                        xfs_warn(mp, "%s option not allowed on this system", p);
                        return -EINVAL;
                case Opt_rtdev:
+                       kfree(mp->m_rtname);
                        mp->m_rtname = match_strdup(args);
                        if (!mp->m_rtname)
                                return -ENOMEM;
index d32b688..d23dcdd 100644 (file)
@@ -56,6 +56,7 @@ struct drm_printer;
 #define DRIVER_ATOMIC                  0x10000
 #define DRIVER_KMS_LEGACY_CONTEXT      0x20000
 #define DRIVER_SYNCOBJ                  0x40000
+#define DRIVER_PREFER_XBGR_30BPP        0x80000
 
 /**
  * struct drm_driver - DRM driver structure
index d02a4df..d3f264a 100644 (file)
@@ -27,3 +27,8 @@
 #if __has_feature(address_sanitizer)
 #define __SANITIZE_ADDRESS__
 #endif
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
index 901c1cc..e2c7f43 100644 (file)
 #define __weak         __attribute__((weak))
 #define __alias(symbol)        __attribute__((alias(#symbol)))
 
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
 /*
  * it doesn't make sense on ARM (currently the only user of __naked)
  * to trace naked functions because then mcount is called without
index 506a981..bc27cf0 100644 (file)
@@ -6,10 +6,10 @@
 #include <linux/types.h>
 
 /* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
 #else
-#define __noretpoline
+#define __noinitretpoline
 #endif
 
 /* These macros are used to mark some functions or 
@@ -47,7 +47,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init         __section(.init.text) __cold  __latent_entropy __noretpoline
+#define __init         __section(.init.text) __cold  __latent_entropy __noinitretpoline
 #define __initdata     __section(.init.data)
 #define __initconst    __section(.init.rodata)
 #define __exitdata     __section(.exit.data)
index b6a29c1..2168cc6 100644 (file)
@@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_init(void);
+extern void jump_label_invalidate_init(void);
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void)
        static_key_initialized = true;
 }
 
+static inline void jump_label_invalidate_init(void) {}
+
 static __always_inline bool static_key_false(struct static_key *key)
 {
        if (unlikely(static_key_count(key) > 0))
index ce51455..3fd2915 100644 (file)
@@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option);
 extern char *next_arg(char *args, char **param, char **val);
 
 extern int core_kernel_text(unsigned long addr);
+extern int init_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
index ac0062b..6930c63 100644 (file)
@@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
-void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
@@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+void kvm_arch_irq_routing_update(struct kvm *kvm);
+
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
        /*
@@ -1272,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+               unsigned long start, unsigned long end);
+
 #endif
index fbc98e2..e791ebc 100644 (file)
@@ -5,6 +5,7 @@
 
 #ifndef _LINUX_NOSPEC_H
 #define _LINUX_NOSPEC_H
+#include <asm/barrier.h>
 
 /**
  * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
@@ -30,26 +31,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #endif
 
 /*
- * Warn developers about inappropriate array_index_nospec() usage.
- *
- * Even if the CPU speculates past the WARN_ONCE branch, the
- * sign bit of @index is taken into account when generating the
- * mask.
- *
- * This warning is compiled out when the compiler can infer that
- * @index and @size are less than LONG_MAX.
- */
-#define array_index_mask_nospec_check(index, size)                             \
-({                                                                             \
-       if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,                      \
-           "array_index_nospec() limited to range of [0, LONG_MAX]\n"))        \
-               _mask = 0;                                                      \
-       else                                                                    \
-               _mask = array_index_mask_nospec(index, size);                   \
-       _mask;                                                                  \
-})
-
-/*
  * array_index_nospec - sanitize an array index after a bounds check
  *
  * For a code sequence like:
@@ -67,12 +48,11 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 ({                                                                     \
        typeof(index) _i = (index);                                     \
        typeof(size) _s = (size);                                       \
-       unsigned long _mask = array_index_mask_nospec_check(_i, _s);    \
+       unsigned long _mask = array_index_mask_nospec(_i, _s);          \
                                                                        \
        BUILD_BUG_ON(sizeof(_i) > sizeof(long));                        \
        BUILD_BUG_ON(sizeof(_s) > sizeof(long));                        \
                                                                        \
-       _i &= _mask;                                                    \
-       _i;                                                             \
+       (typeof(_i)) (_i & _mask);                                      \
 })
 #endif /* _LINUX_NOSPEC_H */
index c2d1b15..a91f251 100644 (file)
@@ -15,6 +15,7 @@
 
 #define ARC_REG_MCIP_BCR       0x0d0
 #define ARC_REG_MCIP_IDU_BCR   0x0D5
+#define ARC_REG_GFRC_BUILD     0x0D6
 #define ARC_REG_MCIP_CMD       0x600
 #define ARC_REG_MCIP_WDATA     0x601
 #define ARC_REG_MCIP_READBACK  0x602
@@ -36,10 +37,14 @@ struct mcip_cmd {
 #define CMD_SEMA_RELEASE               0x12
 
 #define CMD_DEBUG_SET_MASK             0x34
+#define CMD_DEBUG_READ_MASK            0x35
 #define CMD_DEBUG_SET_SELECT           0x36
+#define CMD_DEBUG_READ_SELECT          0x37
 
 #define CMD_GFRC_READ_LO               0x42
 #define CMD_GFRC_READ_HI               0x43
+#define CMD_GFRC_SET_CORE              0x47
+#define CMD_GFRC_READ_CORE             0x48
 
 #define CMD_IDU_ENABLE                 0x71
 #define CMD_IDU_DISABLE                        0x72
index 91a31ff..9a781f0 100644 (file)
@@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer {
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 
 struct drm_virtgpu_getparam {
        __u64 param;
index 3d77fe9..9008f31 100644 (file)
@@ -42,7 +42,7 @@ typedef enum {
        SEV_RET_INVALID_PLATFORM_STATE,
        SEV_RET_INVALID_GUEST_STATE,
        SEV_RET_INAVLID_CONFIG,
-       SEV_RET_INVALID_len,
+       SEV_RET_INVALID_LEN,
        SEV_RET_ALREADY_OWNED,
        SEV_RET_INVALID_CERTIFICATE,
        SEV_RET_POLICY_FAILURE,
index a8100b9..969eaf1 100644 (file)
@@ -89,6 +89,7 @@
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/rodata_test.h>
+#include <linux/jump_label.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused)
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        ftrace_free_init_mem();
+       jump_label_invalidate_init();
        free_initmem();
        mark_readonly();
        system_state = SYSTEM_RUNNING;
index a17fdb6..6a5b61e 100644 (file)
@@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
        return e;
 }
 
-static inline int init_kernel_text(unsigned long addr)
+int init_kernel_text(unsigned long addr)
 {
        if (addr >= (unsigned long)_sinittext &&
            addr < (unsigned long)_einittext)
index b451709..52a0a7a 100644 (file)
@@ -366,12 +366,15 @@ static void __jump_label_update(struct static_key *key,
 {
        for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
                /*
-                * entry->code set to 0 invalidates module init text sections
-                * kernel_text_address() verifies we are not in core kernel
-                * init code, see jump_label_invalidate_module_init().
+                * An entry->code of 0 indicates an entry which has been
+                * disabled because it was in an init text area.
                 */
-               if (entry->code && kernel_text_address(entry->code))
-                       arch_jump_label_transform(entry, jump_label_type(entry));
+               if (entry->code) {
+                       if (kernel_text_address(entry->code))
+                               arch_jump_label_transform(entry, jump_label_type(entry));
+                       else
+                               WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code);
+               }
        }
 }
 
@@ -417,6 +420,19 @@ void __init jump_label_init(void)
        cpus_read_unlock();
 }
 
+/* Disable any jump label entries in __init code */
+void __init jump_label_invalidate_init(void)
+{
+       struct jump_entry *iter_start = __start___jump_table;
+       struct jump_entry *iter_stop = __stop___jump_table;
+       struct jump_entry *iter;
+
+       for (iter = iter_start; iter < iter_stop; iter++) {
+               if (init_kernel_text(iter->code))
+                       iter->code = 0;
+       }
+}
+
 #ifdef CONFIG_MODULES
 
 static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -633,6 +649,7 @@ static void jump_label_del_module(struct module *mod)
        }
 }
 
+/* Disable any jump label entries in module init code */
 static void jump_label_invalidate_module_init(struct module *mod)
 {
        struct jump_entry *iter_start = mod->jump_entries;
index fc11235..f274fbe 100644 (file)
@@ -2397,7 +2397,7 @@ skip:
 
                if (console_lock_spinning_disable_and_check()) {
                        printk_safe_exit_irqrestore(flags);
-                       return;
+                       goto out;
                }
 
                printk_safe_exit_irqrestore(flags);
@@ -2430,6 +2430,7 @@ skip:
        if (retry && console_trylock())
                goto again;
 
+out:
        if (wake_klogd)
                wake_up_klogd();
 }
index 1b34d21..7f5cdc1 100644 (file)
@@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
        if (unlikely(virt == NULL))
                return;
 
-       entry = dma_entry_alloc();
-       if (!entry)
+       /* handle vmalloc and linear addresses */
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
                return;
 
-       /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+       entry = dma_entry_alloc();
+       if (!entry)
                return;
 
        entry->type      = dma_debug_coherent;
@@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
        };
 
        /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
                return;
 
        if (is_vmalloc_addr(virt))
index 99ec5bc..823b813 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
-       int base = idr->idr_base;
-       int id = *nextid;
+       unsigned int base = idr->idr_base;
+       unsigned int id = *nextid;
 
        if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
                return -EINVAL;
@@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr,
 
        radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
                int ret;
+               unsigned long id = iter.index + base;
 
-               if (WARN_ON_ONCE(iter.index > INT_MAX))
+               if (WARN_ON_ONCE(id > INT_MAX))
                        break;
-               ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
+               ret = fn(id, rcu_dereference_raw(*slot), data);
                if (ret)
                        return ret;
        }
@@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
        struct radix_tree_iter iter;
        void __rcu **slot;
-       int base = idr->idr_base;
-       int id = *nextid;
+       unsigned long base = idr->idr_base;
+       unsigned long id = *nextid;
 
        id = (id < base) ? 0 : id - base;
        slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
index 0e349b8..ba942e3 100644 (file)
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
 hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
 
 HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
@@ -16,7 +17,6 @@ HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
 bpf-direct-objs := bpf-direct.o
 
 # Try to match the kernel target.
-ifndef CROSS_COMPILE
 ifndef CONFIG_64BIT
 
 # s390 has -m31 flag to build 31 bit binaries
@@ -35,12 +35,4 @@ HOSTLOADLIBES_bpf-fancy += $(MFLAG)
 HOSTLOADLIBES_dropper += $(MFLAG)
 endif
 always := $(hostprogs-m)
-else
-# MIPS system calls are defined based on the -mabi that is passed
-# to the toolchain which may or may not be a valid option
-# for the host toolchain. So disable tests if target architecture
-# is MIPS but the host isn't.
-ifndef CONFIG_MIPS
-always := $(hostprogs-m)
-endif
 endif
index 47cddf3..4f2b25d 100644 (file)
@@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool
 
 objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
 
+objtool_args += $(if $(part-of-module), --module,)
+
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
 endif
@@ -264,6 +266,12 @@ objtool_args += --no-unreachable
 else
 objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_CFLAGS),)
+  objtool_args += --retpoline
+endif
+endif
+
 
 ifdef CONFIG_MODVERSIONS
 objtool_o = $(@D)/.tmp_$(@F)
index 0b3026d..8a77620 100644 (file)
@@ -889,7 +889,7 @@ static int snd_ctl_elem_read(struct snd_card *card,
 
        index_offset = snd_ctl_get_ioff(kctl, &control->id);
        vd = &kctl->vd[index_offset];
-       if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) && kctl->get == NULL)
+       if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
                return -EPERM;
 
        snd_ctl_build_ioff(&control->id, kctl, index_offset);
index c71dcac..96143df 100644 (file)
@@ -181,7 +181,7 @@ static const struct kernel_param_ops param_ops_xint = {
 };
 #define param_check_xint param_check_int
 
-static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT;
+static int power_save = -1;
 module_param(power_save, xint, 0644);
 MODULE_PARM_DESC(power_save, "Automatic power-saving timeout "
                 "(in second, 0 = disable).");
@@ -2186,6 +2186,24 @@ out_free:
        return err;
 }
 
+#ifdef CONFIG_PM
+/* On some boards setting power_save to a non 0 value leads to clicking /
+ * popping sounds when ever we enter/leave powersaving mode. Ideally we would
+ * figure out how to avoid these sounds, but that is not always feasible.
+ * So we keep a list of devices where we disable powersaving as its known
+ * to causes problems on these devices.
+ */
+static struct snd_pci_quirk power_save_blacklist[] = {
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
+       /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+       SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+       /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
+       SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0),
+       {}
+};
+#endif /* CONFIG_PM */
+
 /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */
 static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {
        [AZX_DRIVER_NVIDIA] = 8,
@@ -2198,6 +2216,7 @@ static int azx_probe_continue(struct azx *chip)
        struct hdac_bus *bus = azx_bus(chip);
        struct pci_dev *pci = chip->pci;
        int dev = chip->dev_index;
+       int val;
        int err;
 
        hda->probe_continued = 1;
@@ -2278,7 +2297,22 @@ static int azx_probe_continue(struct azx *chip)
 
        chip->running = 1;
        azx_add_card_list(chip);
-       snd_hda_set_power_save(&chip->bus, power_save * 1000);
+
+       val = power_save;
+#ifdef CONFIG_PM
+       if (val == -1) {
+               const struct snd_pci_quirk *q;
+
+               val = CONFIG_SND_HDA_POWER_SAVE_DEFAULT;
+               q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
+               if (q && val) {
+                       dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
+                                q->subvendor, q->subdevice);
+                       val = 0;
+               }
+       }
+#endif /* CONFIG_PM */
+       snd_hda_set_power_save(&chip->bus, val * 1000);
        if (azx_has_pm_runtime(chip) || hda->use_vga_switcheroo)
                pm_runtime_put_autosuspend(&pci->dev);
 
index ce28f7c..b9c93fa 100644 (file)
@@ -4997,13 +4997,14 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec,
 
        if (action == HDA_FIXUP_ACT_PRE_PROBE) {
                spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
+               snd_hda_apply_pincfgs(codec, pincfgs);
+       } else if (action == HDA_FIXUP_ACT_INIT) {
                /* Enable DOCK device */
                snd_hda_codec_write(codec, 0x17, 0,
                            AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
                /* Enable DOCK device */
                snd_hda_codec_write(codec, 0x19, 0,
                            AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0);
-               snd_hda_apply_pincfgs(codec, pincfgs);
        }
 }
 
index 5025204..754e632 100644 (file)
@@ -3325,4 +3325,51 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
        }
 },
 
+{
+       /*
+        * Bower's & Wilkins PX headphones only support the 48 kHz sample rate
+        * even though it advertises more. The capture interface doesn't work
+        * even on windows.
+        */
+       USB_DEVICE(0x19b5, 0x0021),
+       .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_COMPOSITE,
+               .data = (const struct snd_usb_audio_quirk[]) {
+                       {
+                               .ifnum = 0,
+                               .type = QUIRK_AUDIO_STANDARD_MIXER,
+                       },
+                       /* Capture */
+                       {
+                               .ifnum = 1,
+                               .type = QUIRK_IGNORE_INTERFACE,
+                       },
+                       /* Playback */
+                       {
+                               .ifnum = 2,
+                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+                               .data = &(const struct audioformat) {
+                                       .formats = SNDRV_PCM_FMTBIT_S16_LE,
+                                       .channels = 2,
+                                       .iface = 2,
+                                       .altsetting = 1,
+                                       .altset_idx = 1,
+                                       .attributes = UAC_EP_CS_ATTR_FILL_MAX |
+                                               UAC_EP_CS_ATTR_SAMPLE_RATE,
+                                       .endpoint = 0x03,
+                                       .ep_attr = USB_ENDPOINT_XFER_ISOC,
+                                       .rates = SNDRV_PCM_RATE_48000,
+                                       .rate_min = 48000,
+                                       .rate_max = 48000,
+                                       .nr_rates = 1,
+                                       .rate_table = (unsigned int[]) {
+                                               48000
+                                       }
+                               }
+                       },
+               }
+       }
+},
+
 #undef USB_DEVICE_VENDOR_SPEC
index a095150..4ed9d0c 100644 (file)
@@ -50,6 +50,7 @@
 /*standard module options for ALSA. This module supports only one card*/
 static int hdmi_card_index = SNDRV_DEFAULT_IDX1;
 static char *hdmi_card_id = SNDRV_DEFAULT_STR1;
+static bool single_port;
 
 module_param_named(index, hdmi_card_index, int, 0444);
 MODULE_PARM_DESC(index,
@@ -57,6 +58,9 @@ MODULE_PARM_DESC(index,
 module_param_named(id, hdmi_card_id, charp, 0444);
 MODULE_PARM_DESC(id,
                "ID string for INTEL Intel HDMI Audio controller.");
+module_param(single_port, bool, 0444);
+MODULE_PARM_DESC(single_port,
+               "Single-port mode (for compatibility)");
 
 /*
  * ELD SA bits in the CEA Speaker Allocation data block
@@ -1579,7 +1583,11 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id)
 static void notify_audio_lpe(struct platform_device *pdev, int port)
 {
        struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev);
-       struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
+       struct snd_intelhad *ctx;
+
+       ctx = &card_ctx->pcm_ctx[single_port ? 0 : port];
+       if (single_port)
+               ctx->port = port;
 
        schedule_work(&ctx->hdmi_audio_wq);
 }
@@ -1743,6 +1751,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 {
        struct snd_card *card;
        struct snd_intelhad_card *card_ctx;
+       struct snd_intelhad *ctx;
        struct snd_pcm *pcm;
        struct intel_hdmi_lpe_audio_pdata *pdata;
        int irq;
@@ -1787,6 +1796,21 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, card_ctx);
 
+       card_ctx->num_pipes = pdata->num_pipes;
+       card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
+
+       for_each_port(card_ctx, port) {
+               ctx = &card_ctx->pcm_ctx[port];
+               ctx->card_ctx = card_ctx;
+               ctx->dev = card_ctx->dev;
+               ctx->port = single_port ? -1 : port;
+               ctx->pipe = -1;
+
+               spin_lock_init(&ctx->had_spinlock);
+               mutex_init(&ctx->mutex);
+               INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
+       }
+
        dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n",
                __func__, (unsigned int)res_mmio->start,
                (unsigned int)res_mmio->end);
@@ -1816,19 +1840,12 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev)
        init_channel_allocations();
 
        card_ctx->num_pipes = pdata->num_pipes;
-       card_ctx->num_ports = pdata->num_ports;
+       card_ctx->num_ports = single_port ? 1 : pdata->num_ports;
 
        for_each_port(card_ctx, port) {
-               struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port];
                int i;
 
-               ctx->card_ctx = card_ctx;
-               ctx->dev = card_ctx->dev;
-               ctx->port = port;
-               ctx->pipe = -1;
-
-               INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq);
-
+               ctx = &card_ctx->pcm_ctx[port];
                ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS,
                                  MAX_CAP_STREAMS, &pcm);
                if (ret)
index a5684d0..5898c22 100755 (executable)
@@ -33,7 +33,7 @@ import resource
 import struct
 import re
 import subprocess
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 
 VMX_EXIT_REASONS = {
     'EXCEPTION_NMI':        0,
@@ -228,6 +228,7 @@ IOCTL_NUMBERS = {
 }
 
 ENCODING = locale.getpreferredencoding(False)
+TRACE_FILTER = re.compile(r'^[^\(]*$')
 
 
 class Arch(object):
@@ -260,6 +261,11 @@ class Arch(object):
                     return ArchX86(SVM_EXIT_REASONS)
                 return
 
+    def tracepoint_is_child(self, field):
+        if (TRACE_FILTER.match(field)):
+            return None
+        return field.split('(', 1)[0]
+
 
 class ArchX86(Arch):
     def __init__(self, exit_reasons):
@@ -267,6 +273,10 @@ class ArchX86(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = exit_reasons
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchPPC(Arch):
     def __init__(self):
@@ -282,6 +292,10 @@ class ArchPPC(Arch):
         self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
         self.exit_reasons = {}
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchA64(Arch):
     def __init__(self):
@@ -289,6 +303,10 @@ class ArchA64(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = AARCH64_EXIT_REASONS
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        return None
+
 
 class ArchS390(Arch):
     def __init__(self):
@@ -296,6 +314,12 @@ class ArchS390(Arch):
         self.ioctl_numbers = IOCTL_NUMBERS
         self.exit_reasons = None
 
+    def debugfs_is_child(self, field):
+        """ Returns name of parent if 'field' is a child, None otherwise """
+        if field.startswith('instruction_'):
+            return 'exit_instruction'
+
+
 ARCH = Arch.get_arch()
 
 
@@ -331,9 +355,6 @@ class perf_event_attr(ctypes.Structure):
 PERF_TYPE_TRACEPOINT = 2
 PERF_FORMAT_GROUP = 1 << 3
 
-PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
-PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
-
 
 class Group(object):
     """Represents a perf event group."""
@@ -376,8 +397,8 @@ class Event(object):
         self.syscall = self.libc.syscall
         self.name = name
         self.fd = None
-        self.setup_event(group, trace_cpu, trace_pid, trace_point,
-                         trace_filter, trace_set)
+        self._setup_event(group, trace_cpu, trace_pid, trace_point,
+                          trace_filter, trace_set)
 
     def __del__(self):
         """Closes the event's file descriptor.
@@ -390,7 +411,7 @@ class Event(object):
         if self.fd:
             os.close(self.fd)
 
-    def perf_event_open(self, attr, pid, cpu, group_fd, flags):
+    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
         """Wrapper for the sys_perf_evt_open() syscall.
 
         Used to set up performance events, returns a file descriptor or -1
@@ -409,7 +430,7 @@ class Event(object):
                             ctypes.c_int(pid), ctypes.c_int(cpu),
                             ctypes.c_int(group_fd), ctypes.c_long(flags))
 
-    def setup_event_attribute(self, trace_set, trace_point):
+    def _setup_event_attribute(self, trace_set, trace_point):
         """Returns an initialized ctype perf_event_attr struct."""
 
         id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
@@ -419,8 +440,8 @@ class Event(object):
         event_attr.config = int(open(id_path).read())
         return event_attr
 
-    def setup_event(self, group, trace_cpu, trace_pid, trace_point,
-                    trace_filter, trace_set):
+    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
+                     trace_filter, trace_set):
         """Sets up the perf event in Linux.
 
         Issues the syscall to register the event in the kernel and
@@ -428,7 +449,7 @@ class Event(object):
 
         """
 
-        event_attr = self.setup_event_attribute(trace_set, trace_point)
+        event_attr = self._setup_event_attribute(trace_set, trace_point)
 
         # First event will be group leader.
         group_leader = -1
@@ -437,8 +458,8 @@ class Event(object):
         if group.events:
             group_leader = group.events[0].fd
 
-        fd = self.perf_event_open(event_attr, trace_pid,
-                                  trace_cpu, group_leader, 0)
+        fd = self._perf_event_open(event_attr, trace_pid,
+                                   trace_cpu, group_leader, 0)
         if fd == -1:
             err = ctypes.get_errno()
             raise OSError(err, os.strerror(err),
@@ -475,6 +496,10 @@ class Event(object):
 
 class Provider(object):
     """Encapsulates functionalities used by all providers."""
+    def __init__(self, pid):
+        self.child_events = False
+        self.pid = pid
+
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
@@ -500,12 +525,12 @@ class TracepointProvider(Provider):
     """
     def __init__(self, pid, fields_filter):
         self.group_leaders = []
-        self.filters = self.get_filters()
+        self.filters = self._get_filters()
         self.update_fields(fields_filter)
-        self.pid = pid
+        super(TracepointProvider, self).__init__(pid)
 
     @staticmethod
-    def get_filters():
+    def _get_filters():
         """Returns a dict of trace events, their filter ids and
         the values that can be filtered.
 
@@ -521,8 +546,8 @@ class TracepointProvider(Provider):
             filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
         return filters
 
-    def get_available_fields(self):
-        """Returns a list of available event's of format 'event name(filter
+    def _get_available_fields(self):
+        """Returns a list of available events of format 'event name(filter
         name)'.
 
         All available events have directories under
@@ -549,11 +574,12 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self.fields = [field for field in self.get_available_fields()
-                       if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self._get_available_fields()
+                       if self.is_field_wanted(fields_filter, field) or
+                       ARCH.tracepoint_is_child(field)]
 
     @staticmethod
-    def get_online_cpus():
+    def _get_online_cpus():
         """Returns a list of cpu id integers."""
         def parse_int_list(list_string):
             """Returns an int list from a string of comma separated integers and
@@ -575,17 +601,17 @@ class TracepointProvider(Provider):
             cpu_string = cpu_list.readline()
             return parse_int_list(cpu_string)
 
-    def setup_traces(self):
+    def _setup_traces(self):
         """Creates all event and group objects needed to be able to retrieve
         data."""
-        fields = self.get_available_fields()
+        fields = self._get_available_fields()
         if self._pid > 0:
             # Fetch list of all threads of the monitored pid, as qemu
             # starts a thread for each vcpu.
             path = os.path.join('/proc', str(self._pid), 'task')
             groupids = self.walkdir(path)[1]
         else:
-            groupids = self.get_online_cpus()
+            groupids = self._get_online_cpus()
 
         # The constant is needed as a buffer for python libs, std
         # streams and other files that the script opens.
@@ -663,7 +689,7 @@ class TracepointProvider(Provider):
         # The garbage collector will get rid of all Event/Group
         # objects and open files after removing the references.
         self.group_leaders = []
-        self.setup_traces()
+        self._setup_traces()
         self.fields = self._fields
 
     def read(self, by_guest=0):
@@ -671,8 +697,12 @@ class TracepointProvider(Provider):
         ret = defaultdict(int)
         for group in self.group_leaders:
             for name, val in group.read().items():
-                if name in self._fields:
-                    ret[name] += val
+                if name not in self._fields:
+                    continue
+                parent = ARCH.tracepoint_is_child(name)
+                if parent:
+                    name += ' ' + parent
+                ret[name] += val
         return ret
 
     def reset(self):
@@ -690,11 +720,11 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.do_read = True
         self.paths = []
-        self.pid = pid
+        super(DebugfsProvider, self).__init__(pid)
         if include_past:
-            self.restore()
+            self._restore()
 
-    def get_available_fields(self):
+    def _get_available_fields(self):
         """"Returns a list of available fields.
 
         The fields are all available KVM debugfs files
@@ -704,8 +734,9 @@ class DebugfsProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self._fields = [field for field in self._get_available_fields()
+                        if self.is_field_wanted(fields_filter, field) or
+                        ARCH.debugfs_is_child(field)]
 
     @property
     def fields(self):
@@ -758,7 +789,7 @@ class DebugfsProvider(Provider):
                     paths.append(dir)
         for path in paths:
             for field in self._fields:
-                value = self.read_field(field, path)
+                value = self._read_field(field, path)
                 key = path + field
                 if reset == 1:
                     self._baseline[key] = value
@@ -766,20 +797,21 @@ class DebugfsProvider(Provider):
                     self._baseline[key] = 0
                 if self._baseline.get(key, -1) == -1:
                     self._baseline[key] = value
-                increment = (results.get(field, 0) + value -
-                             self._baseline.get(key, 0))
-                if by_guest:
-                    pid = key.split('-')[0]
-                    if pid in results:
-                        results[pid] += increment
-                    else:
-                        results[pid] = increment
+                parent = ARCH.debugfs_is_child(field)
+                if parent:
+                    field = field + ' ' + parent
+                else:
+                    if by_guest:
+                        field = key.split('-')[0]    # set 'field' to 'pid'
+                increment = value - self._baseline.get(key, 0)
+                if field in results:
+                    results[field] += increment
                 else:
                     results[field] = increment
 
         return results
 
-    def read_field(self, field, path):
+    def _read_field(self, field, path):
         """Returns the value of a single field from a specific VM."""
         try:
             return int(open(os.path.join(PATH_DEBUGFS_KVM,
@@ -794,12 +826,15 @@ class DebugfsProvider(Provider):
         self._baseline = {}
         self.read(1)
 
-    def restore(self):
+    def _restore(self):
         """Reset field counters"""
         self._baseline = {}
         self.read(2)
 
 
+EventStat = namedtuple('EventStat', ['value', 'delta'])
+
+
 class Stats(object):
     """Manages the data providers and the data they provide.
 
@@ -808,13 +843,13 @@ class Stats(object):
 
     """
     def __init__(self, options):
-        self.providers = self.get_providers(options)
+        self.providers = self._get_providers(options)
         self._pid_filter = options.pid
         self._fields_filter = options.fields
         self.values = {}
+        self._child_events = False
 
-    @staticmethod
-    def get_providers(options):
+    def _get_providers(self, options):
         """Returns a list of data providers depending on the passed options."""
         providers = []
 
@@ -826,7 +861,7 @@ class Stats(object):
 
         return providers
 
-    def update_provider_filters(self):
+    def _update_provider_filters(self):
         """Propagates fields filters to providers."""
         # As we reset the counters when updating the fields we can
         # also clear the cache of old values.
@@ -847,7 +882,7 @@ class Stats(object):
     def fields_filter(self, fields_filter):
         if fields_filter != self._fields_filter:
             self._fields_filter = fields_filter
-            self.update_provider_filters()
+            self._update_provider_filters()
 
     @property
     def pid_filter(self):
@@ -861,16 +896,33 @@ class Stats(object):
             for provider in self.providers:
                 provider.pid = self._pid_filter
 
+    @property
+    def child_events(self):
+        return self._child_events
+
+    @child_events.setter
+    def child_events(self, val):
+        self._child_events = val
+        for provider in self.providers:
+            provider.child_events = val
+
     def get(self, by_guest=0):
         """Returns a dict with field -> (value, delta to last value) of all
-        provider data."""
+        provider data.
+        Key formats:
+          * plain: 'key' is event name
+          * child-parent: 'key' is in format '<child> <parent>'
+          * pid: 'key' is the pid of the guest, and the record contains the
+               aggregated event data
+        These formats are generated by the providers, and handled in class TUI.
+        """
         for provider in self.providers:
             new = provider.read(by_guest=by_guest)
-            for key in new if by_guest else provider.fields:
-                oldval = self.values.get(key, (0, 0))[0]
+            for key in new:
+                oldval = self.values.get(key, EventStat(0, 0)).value
                 newval = new.get(key, 0)
                 newdelta = newval - oldval
-                self.values[key] = (newval, newdelta)
+                self.values[key] = EventStat(newval, newdelta)
         return self.values
 
     def toggle_display_guests(self, to_pid):
@@ -899,10 +951,10 @@ class Stats(object):
         self.get(to_pid)
         return 0
 
+
 DELAY_DEFAULT = 3.0
 MAX_GUEST_NAME_LEN = 48
 MAX_REGEX_LEN = 44
-DEFAULT_REGEX = r'^[^\(]*$'
 SORT_DEFAULT = 0
 
 
@@ -969,7 +1021,7 @@ class Tui(object):
 
         return res
 
-    def print_all_gnames(self, row):
+    def _print_all_gnames(self, row):
         """Print a list of all running guests along with their pids."""
         self.screen.addstr(row, 2, '%8s  %-60s' %
                            ('Pid', 'Guest Name (fuzzy list, might be '
@@ -1032,19 +1084,13 @@ class Tui(object):
 
         return name
 
-    def update_drilldown(self):
-        """Sets or removes a filter that only allows fields without braces."""
-        if not self.stats.fields_filter:
-            self.stats.fields_filter = DEFAULT_REGEX
-
-        elif self.stats.fields_filter == DEFAULT_REGEX:
-            self.stats.fields_filter = None
-
-    def update_pid(self, pid):
+    def _update_pid(self, pid):
         """Propagates pid selection to stats object."""
+        self.screen.addstr(4, 1, 'Updating pid filter...')
+        self.screen.refresh()
         self.stats.pid_filter = pid
 
-    def refresh_header(self, pid=None):
+    def _refresh_header(self, pid=None):
         """Refreshes the header."""
         if pid is None:
             pid = self.stats.pid_filter
@@ -1059,8 +1105,7 @@ class Tui(object):
                                .format(pid, gname), curses.A_BOLD)
         else:
             self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
-        if self.stats.fields_filter and self.stats.fields_filter \
-           != DEFAULT_REGEX:
+        if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
                 regex = regex[:MAX_REGEX_LEN] + '...'
@@ -1075,56 +1120,99 @@ class Tui(object):
         self.screen.addstr(4, 1, 'Collecting data...')
         self.screen.refresh()
 
-    def refresh_body(self, sleeptime):
+    def _refresh_body(self, sleeptime):
+        def is_child_field(field):
+            return field.find('(') != -1
+
+        def insert_child(sorted_items, child, values, parent):
+            num = len(sorted_items)
+            for i in range(0, num):
+                # only add child if parent is present
+                if parent.startswith(sorted_items[i][0]):
+                    sorted_items.insert(i + 1, ('  ' + child, values))
+
+        def get_sorted_events(self, stats):
+            """ separate parent and child events """
+            if self._sorting == SORT_DEFAULT:
+                def sortkey((_k, v)):
+                    # sort by (delta value, overall value)
+                    return (v.delta, v.value)
+            else:
+                def sortkey((_k, v)):
+                    # sort by overall value
+                    return v.value
+
+            childs = []
+            sorted_items = []
+            # we can't rule out child events to appear prior to parents even
+            # when sorted - separate out all children first, and add in later
+            for key, values in sorted(stats.items(), key=sortkey,
+                                      reverse=True):
+                if values == (0, 0):
+                    continue
+                if key.find(' ') != -1:
+                    if not self.stats.child_events:
+                        continue
+                    childs.insert(0, (key, values))
+                else:
+                    sorted_items.append((key, values))
+            if self.stats.child_events:
+                for key, values in childs:
+                    (child, parent) = key.split(' ')
+                    insert_child(sorted_items, child, values, parent)
+
+            return sorted_items
+
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
         stats = self.stats.get(self._display_guests)
-
-        def sortCurAvg(x):
-            # sort by current events if available
-            if stats[x][1]:
-                return (-stats[x][1], -stats[x][0])
+        total = 0.
+        ctotal = 0.
+        for key, values in stats.items():
+            if self._display_guests:
+                if self.get_gname_from_pid(key):
+                    total += values.value
+                continue
+            if not key.find(' ') != -1:
+                total += values.value
             else:
-                return (0, -stats[x][0])
+                ctotal += values.value
+        if total == 0.:
+            # we don't have any fields, or all non-child events are filtered
+            total = ctotal
 
-        def sortTotal(x):
-            # sort by totals
-            return (0, -stats[x][0])
-        total = 0.
-        for key in stats.keys():
-            if key.find('(') is -1:
-                total += stats[key][0]
-        if self._sorting == SORT_DEFAULT:
-            sortkey = sortCurAvg
-        else:
-            sortkey = sortTotal
+        # print events
         tavg = 0
-        for key in sorted(stats.keys(), key=sortkey):
-            if row >= self.screen.getmaxyx()[0] - 1:
-                break
-            values = stats[key]
-            if not values[0] and not values[1]:
+        tcur = 0
+        for key, values in get_sorted_events(self, stats):
+            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
-            if values[0] is not None:
-                cur = int(round(values[1] / sleeptime)) if values[1] else ''
-                if self._display_guests:
-                    key = self.get_gname_from_pid(key)
-                self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
-                                   (key, values[0], values[0] * 100 / total,
-                                    cur))
-                if cur is not '' and key.find('(') is -1:
-                    tavg += cur
+            if self._display_guests:
+                key = self.get_gname_from_pid(key)
+                if not key:
+                    continue
+            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            if key[0] != ' ':
+                if values.delta:
+                    tcur += values.delta
+                ptotal = values.value
+                ltotal = total
+            else:
+                ltotal = ptotal
+            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
+                               values.value,
+                               values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
-        else:
+        if row > 4:
+            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
-                               ('Total', total, tavg if tavg else ''),
-                               curses.A_BOLD)
+                               ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
-    def show_msg(self, text):
+    def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
         curses.cbreak()
@@ -1139,16 +1227,16 @@ class Tui(object):
                            curses.A_STANDOUT)
         self.screen.getkey()
 
-    def show_help_interactive(self):
+    def _show_help_interactive(self):
         """Display help with list of interactive commands"""
         msg = ('   b     toggle events by guests (debugfs only, honors'
                ' filters)',
                '   c     clear filter',
                '   f     filter by regular expression',
-               '   g     filter by guest name',
+               '   g     filter by guest name/PID',
                '   h     display interactive commands reference',
                '   o     toggle sorting order (Total vs CurAvg/s)',
-               '   p     filter by PID',
+               '   p     filter by guest name/PID',
                '   q     quit',
                '   r     reset stats',
                '   s     set update interval',
@@ -1165,14 +1253,15 @@ class Tui(object):
             self.screen.addstr(row, 0, line)
             row += 1
         self.screen.getkey()
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_filter_selection(self):
+    def _show_filter_selection(self):
         """Draws filter selection mask.
 
         Asks for a valid regex and sets the fields filter accordingly.
 
         """
+        msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
@@ -1181,61 +1270,25 @@ class Tui(object):
             self.screen.addstr(2, 0,
                                "Current regex: {0}"
                                .format(self.stats.fields_filter))
+            self.screen.addstr(5, 0, msg)
             self.screen.addstr(3, 0, "New regex: ")
             curses.echo()
             regex = self.screen.getstr().decode(ENCODING)
             curses.noecho()
             if len(regex) == 0:
-                self.stats.fields_filter = DEFAULT_REGEX
-                self.refresh_header()
+                self.stats.fields_filter = ''
+                self._refresh_header()
                 return
             try:
                 re.compile(regex)
                 self.stats.fields_filter = regex
-                self.refresh_header()
+                self._refresh_header()
                 return
             except re.error:
+                msg = '"' + regex + '": Not a valid regular expression'
                 continue
 
-    def show_vm_selection_by_pid(self):
-        """Draws PID selection mask.
-
-        Asks for a pid until a valid pid or 0 has been entered.
-
-        """
-        msg = ''
-        while True:
-            self.screen.erase()
-            self.screen.addstr(0, 0,
-                               'Show statistics for specific pid.',
-                               curses.A_BOLD)
-            self.screen.addstr(1, 0,
-                               'This might limit the shown data to the trace '
-                               'statistics.')
-            self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
-
-            curses.echo()
-            self.screen.addstr(3, 0, "Pid [0 or pid]: ")
-            pid = self.screen.getstr().decode(ENCODING)
-            curses.noecho()
-
-            try:
-                if len(pid) > 0:
-                    pid = int(pid)
-                    if pid != 0 and not os.path.isdir(os.path.join('/proc/',
-                                                                   str(pid))):
-                        msg = '"' + str(pid) + '": Not a running process'
-                        continue
-                else:
-                    pid = 0
-                self.refresh_header(pid)
-                self.update_pid(pid)
-                break
-            except ValueError:
-                msg = '"' + str(pid) + '": Not a valid pid'
-
-    def show_set_update_interval(self):
+    def _show_set_update_interval(self):
         """Draws update interval selection mask."""
         msg = ''
         while True:
@@ -1265,60 +1318,67 @@ class Tui(object):
 
             except ValueError:
                 msg = '"' + str(val) + '": Invalid value'
-        self.refresh_header()
+        self._refresh_header()
 
-    def show_vm_selection_by_guest_name(self):
+    def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
-        Asks for a guest name until a valid guest name or '' is entered.
+        Asks for a guest name or pid until a valid guest name or '' is entered.
 
         """
         msg = ''
         while True:
             self.screen.erase()
             self.screen.addstr(0, 0,
-                               'Show statistics for specific guest.',
+                               'Show statistics for specific guest or pid.',
                                curses.A_BOLD)
             self.screen.addstr(1, 0,
                                'This might limit the shown data to the trace '
                                'statistics.')
             self.screen.addstr(5, 0, msg)
-            self.print_all_gnames(7)
+            self._print_all_gnames(7)
             curses.echo()
-            self.screen.addstr(3, 0, "Guest [ENTER or guest]: ")
-            gname = self.screen.getstr().decode(ENCODING)
+            curses.curs_set(1)
+            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
+            guest = self.screen.getstr().decode(ENCODING)
             curses.noecho()
 
-            if not gname:
-                self.refresh_header(0)
-                self.update_pid(0)
+            pid = 0
+            if not guest or guest == '0':
                 break
-            else:
-                pids = []
-                try:
-                    pids = self.get_pid_from_gname(gname)
-                except:
-                    msg = '"' + gname + '": Internal error while searching, ' \
-                          'use pid filter instead'
-                    continue
-                if len(pids) == 0:
-                    msg = '"' + gname + '": Not an active guest'
+            if guest.isdigit():
+                if not os.path.isdir(os.path.join('/proc/', guest)):
+                    msg = '"' + guest + '": Not a running process'
                     continue
-                if len(pids) > 1:
-                    msg = '"' + gname + '": Multiple matches found, use pid ' \
-                          'filter instead'
-                    continue
-                self.refresh_header(pids[0])
-                self.update_pid(pids[0])
+                pid = int(guest)
                 break
+            pids = []
+            try:
+                pids = self.get_pid_from_gname(guest)
+            except:
+                msg = '"' + guest + '": Internal error while searching, ' \
+                      'use pid filter instead'
+                continue
+            if len(pids) == 0:
+                msg = '"' + guest + '": Not an active guest'
+                continue
+            if len(pids) > 1:
+                msg = '"' + guest + '": Multiple matches found, use pid ' \
+                      'filter instead'
+                continue
+            pid = pids[0]
+            break
+        curses.curs_set(0)
+        self._refresh_header(pid)
+        self._update_pid(pid)
 
     def show_stats(self):
         """Refreshes the screen and processes user input."""
         sleeptime = self._delay_initial
-        self.refresh_header()
+        self._refresh_header()
         start = 0.0  # result based on init value never appears on screen
         while True:
-            self.refresh_body(time.time() - start)
+            self._refresh_body(time.time() - start)
             curses.halfdelay(int(sleeptime * 10))
             start = time.time()
             sleeptime = self._delay_regular
@@ -1327,47 +1387,39 @@ class Tui(object):
                 if char == 'b':
                     self._display_guests = not self._display_guests
                     if self.stats.toggle_display_guests(self._display_guests):
-                        self.show_msg(['Command not available with tracepoints'
-                                       ' enabled', 'Restart with debugfs only '
-                                       '(see option \'-d\') and try again!'])
+                        self._show_msg(['Command not available with '
+                                        'tracepoints enabled', 'Restart with '
+                                        'debugfs only (see option \'-d\') and '
+                                        'try again!'])
                         self._display_guests = not self._display_guests
-                    self.refresh_header()
+                    self._refresh_header()
                 if char == 'c':
-                    self.stats.fields_filter = DEFAULT_REGEX
-                    self.refresh_header(0)
-                    self.update_pid(0)
+                    self.stats.fields_filter = ''
+                    self._refresh_header(0)
+                    self._update_pid(0)
                 if char == 'f':
                     curses.curs_set(1)
-                    self.show_filter_selection()
+                    self._show_filter_selection()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
-                if char == 'g':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_guest_name()
-                    curses.curs_set(0)
+                if char == 'g' or char == 'p':
+                    self._show_vm_selection_by_guest()
                     sleeptime = self._delay_initial
                 if char == 'h':
-                    self.show_help_interactive()
+                    self._show_help_interactive()
                 if char == 'o':
                     self._sorting = not self._sorting
-                if char == 'p':
-                    curses.curs_set(1)
-                    self.show_vm_selection_by_pid()
-                    curses.curs_set(0)
-                    sleeptime = self._delay_initial
                 if char == 'q':
                     break
                 if char == 'r':
                     self.stats.reset()
                 if char == 's':
                     curses.curs_set(1)
-                    self.show_set_update_interval()
+                    self._show_set_update_interval()
                     curses.curs_set(0)
                     sleeptime = self._delay_initial
                 if char == 'x':
-                    self.update_drilldown()
-                    # prevents display of current values on next refresh
-                    self.stats.get(self._display_guests)
+                    self.stats.child_events = not self.stats.child_events
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1380,9 +1432,9 @@ def batch(stats):
         s = stats.get()
         time.sleep(1)
         s = stats.get()
-        for key in sorted(s.keys()):
-            values = s[key]
-            print('%-42s%10d%10d' % (key, values[0], values[1]))
+        for key, values in sorted(s.items()):
+            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
+                  values.delta))
     except KeyboardInterrupt:
         pass
 
@@ -1392,14 +1444,14 @@ def log(stats):
     keys = sorted(stats.get().keys())
 
     def banner():
-        for k in keys:
-            print(k, end=' ')
+        for key in keys:
+            print(key.split(' ')[0], end=' ')
         print()
 
     def statline():
         s = stats.get()
-        for k in keys:
-            print(' %9d' % s[k][1], end=' ')
+        for key in keys:
+            print(' %9d' % s[key].delta, end=' ')
         print()
     line = 0
     banner_repeat = 20
@@ -1504,7 +1556,7 @@ Press any other key to refresh statistics immediately.
                          )
     optparser.add_option('-f', '--fields',
                          action='store',
-                         default=DEFAULT_REGEX,
+                         default='',
                          dest='fields',
                          help='''fields to display (regex)
                                  "-f help" for a list of available events''',
@@ -1539,17 +1591,6 @@ Press any other key to refresh statistics immediately.
 
 def check_access(options):
     """Exits if the current user can't access all needed directories."""
-    if not os.path.exists('/sys/kernel/debug'):
-        sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
-        sys.exit(1)
-
-    if not os.path.exists(PATH_DEBUGFS_KVM):
-        sys.stderr.write("Please make sure, that debugfs is mounted and "
-                         "readable by the current user:\n"
-                         "('mount -t debugfs debugfs /sys/kernel/debug')\n"
-                         "Also ensure, that the kvm modules are loaded.\n")
-        sys.exit(1)
-
     if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
                                                      not options.debugfs):
         sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
@@ -1567,7 +1608,33 @@ def check_access(options):
     return options
 
 
+def assign_globals():
+    global PATH_DEBUGFS_KVM
+    global PATH_DEBUGFS_TRACING
+
+    debugfs = ''
+    for line in file('/proc/mounts'):
+        if line.split(' ')[0] == 'debugfs':
+            debugfs = line.split(' ')[1]
+            break
+    if debugfs == '':
+        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
+                         "your kernel, mounted and\nreadable by the current "
+                         "user:\n"
+                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
+        sys.exit(1)
+
+    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
+    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
+
+    if not os.path.exists(PATH_DEBUGFS_KVM):
+        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
+                         "your kernel and that the modules are loaded.\n")
+        sys.exit(1)
+
+
 def main():
+    assign_globals()
     options = get_options()
     options = check_access(options)
 
index b5b3810..0811d86 100644 (file)
@@ -35,13 +35,13 @@ INTERACTIVE COMMANDS
 
 *f*::  filter by regular expression
 
-*g*::  filter by guest name
+*g*::  filter by guest name/PID
 
 *h*::  display interactive commands reference
 
 *o*::   toggle sorting order (Total vs CurAvg/s)
 
-*p*::  filter by PID
+*p*::  filter by guest name/PID
 
 *q*::  quit
 
index 57254f5..694abc6 100644 (file)
@@ -29,7 +29,7 @@
 #include "builtin.h"
 #include "check.h"
 
-bool no_fp, no_unreachable;
+bool no_fp, no_unreachable, retpoline, module;
 
 static const char * const check_usage[] = {
        "objtool check [<options>] file.o",
@@ -39,6 +39,8 @@ static const char * const check_usage[] = {
 const struct option check_options[] = {
        OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
        OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+       OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+       OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
        OPT_END(),
 };
 
@@ -53,5 +55,5 @@ int cmd_check(int argc, const char **argv)
 
        objname = argv[0];
 
-       return check(objname, no_fp, no_unreachable, false);
+       return check(objname, false);
 }
index 91e8e19..77ea2b9 100644 (file)
@@ -25,7 +25,6 @@
  */
 
 #include <string.h>
-#include <subcmd/parse-options.h>
 #include "builtin.h"
 #include "check.h"
 
@@ -36,9 +35,6 @@ static const char *orc_usage[] = {
        NULL,
 };
 
-extern const struct option check_options[];
-extern bool no_fp, no_unreachable;
-
 int cmd_orc(int argc, const char **argv)
 {
        const char *objname;
@@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv)
 
                objname = argv[0];
 
-               return check(objname, no_fp, no_unreachable, true);
+               return check(objname, true);
        }
 
        if (!strcmp(argv[0], "dump")) {
index dd52606..28ff40e 100644 (file)
 #ifndef _BUILTIN_H
 #define _BUILTIN_H
 
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable, retpoline, module;
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 
index a8cb69a..472e64e 100644 (file)
@@ -18,6 +18,7 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "builtin.h"
 #include "check.h"
 #include "elf.h"
 #include "special.h"
@@ -33,7 +34,6 @@ struct alternative {
 };
 
 const char *objname;
-static bool no_fp;
 struct cfi_state initial_func_cfi;
 
 struct instruction *find_insn(struct objtool_file *file,
@@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file)
                         * disguise, so convert them accordingly.
                         */
                        insn->type = INSN_JUMP_DYNAMIC;
+                       insn->retpoline_safe = true;
                        continue;
                } else {
                        /* sibling call */
@@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file)
                        if (!insn->call_dest && !insn->ignore) {
                                WARN_FUNC("unsupported intra-function call",
                                          insn->sec, insn->offset);
-                               WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+                               if (retpoline)
+                                       WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
                                return -1;
                        }
 
@@ -1108,6 +1110,54 @@ static int read_unwind_hints(struct objtool_file *file)
        return 0;
 }
 
+static int read_retpoline_hints(struct objtool_file *file)
+{
+       struct section *sec, *relasec;
+       struct instruction *insn;
+       struct rela *rela;
+       int i;
+
+       sec = find_section_by_name(file->elf, ".discard.retpoline_safe");
+       if (!sec)
+               return 0;
+
+       relasec = sec->rela;
+       if (!relasec) {
+               WARN("missing .rela.discard.retpoline_safe section");
+               return -1;
+       }
+
+       if (sec->len % sizeof(unsigned long)) {
+               WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long));
+               return -1;
+       }
+
+       for (i = 0; i < sec->len / sizeof(unsigned long); i++) {
+               rela = find_rela_by_dest(sec, i * sizeof(unsigned long));
+               if (!rela) {
+                       WARN("can't find rela for retpoline_safe[%d]", i);
+                       return -1;
+               }
+
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!insn) {
+                       WARN("can't find insn for retpoline_safe[%d]", i);
+                       return -1;
+               }
+
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC) {
+                       WARN_FUNC("retpoline_safe hint not a indirect jump/call",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+
+               insn->retpoline_safe = true;
+       }
+
+       return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
        int ret;
@@ -1146,6 +1196,10 @@ static int decode_sections(struct objtool_file *file)
        if (ret)
                return ret;
 
+       ret = read_retpoline_hints(file);
+       if (ret)
+               return ret;
+
        return 0;
 }
 
@@ -1891,6 +1945,38 @@ static int validate_unwind_hints(struct objtool_file *file)
        return warnings;
 }
 
+static int validate_retpoline(struct objtool_file *file)
+{
+       struct instruction *insn;
+       int warnings = 0;
+
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_JUMP_DYNAMIC &&
+                   insn->type != INSN_CALL_DYNAMIC)
+                       continue;
+
+               if (insn->retpoline_safe)
+                       continue;
+
+               /*
+                * .init.text code is ran before userspace and thus doesn't
+                * strictly need retpolines, except for modules which are
+                * loaded late, they very much do need retpoline in their
+                * .init.text
+                */
+               if (!strcmp(insn->sec->name, ".init.text") && !module)
+                       continue;
+
+               WARN_FUNC("indirect %s found in RETPOLINE build",
+                         insn->sec, insn->offset,
+                         insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+               warnings++;
+       }
+
+       return warnings;
+}
+
 static bool is_kasan_insn(struct instruction *insn)
 {
        return (insn->type == INSN_CALL &&
@@ -2022,13 +2108,12 @@ static void cleanup(struct objtool_file *file)
        elf_close(file->elf);
 }
 
-int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
+int check(const char *_objname, bool orc)
 {
        struct objtool_file file;
        int ret, warnings = 0;
 
        objname = _objname;
-       no_fp = _no_fp;
 
        file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
        if (!file.elf)
@@ -2052,6 +2137,13 @@ int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc)
        if (list_empty(&file.insn_list))
                goto out;
 
+       if (retpoline) {
+               ret = validate_retpoline(&file);
+               if (ret < 0)
+                       return ret;
+               warnings += ret;
+       }
+
        ret = validate_functions(&file);
        if (ret < 0)
                goto out;
index 23a1d06..c6b68fc 100644 (file)
@@ -45,6 +45,7 @@ struct instruction {
        unsigned char type;
        unsigned long immediate;
        bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+       bool retpoline_safe;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct instruction *first_jump_src;
@@ -63,7 +64,7 @@ struct objtool_file {
        bool ignore_unreachables, c_file, hints;
 };
 
-int check(const char *objname, bool no_fp, bool no_unreachable, bool orc);
+int check(const char *objname, bool orc);
 
 struct instruction *find_insn(struct objtool_file *file,
                              struct section *sec, unsigned long offset);
index 44ef9eb..6c645eb 100644 (file)
@@ -178,6 +178,55 @@ void idr_get_next_test(int base)
        idr_destroy(&idr);
 }
 
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+       BUG_ON(id < 0);
+       BUG_ON(ptr != DUMMY_PTR);
+       return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+       static bool warned = false;
+       u32 id = handle;
+       int sid = 0;
+       void *ptr;
+
+       BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+       BUG_ON(id != handle);
+       BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+       BUG_ON(id != handle);
+       if (!warned && id > INT_MAX)
+               printk("vvv Ignore these warnings\n");
+       ptr = idr_get_next(idr, &sid);
+       if (id > INT_MAX) {
+               BUG_ON(ptr != NULL);
+               BUG_ON(sid != 0);
+       } else {
+               BUG_ON(ptr != DUMMY_PTR);
+               BUG_ON(sid != id);
+       }
+       idr_for_each(idr, idr_u32_cb, NULL);
+       if (!warned && id > INT_MAX) {
+               printk("^^^ Warnings over\n");
+               warned = true;
+       }
+       BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+       BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+       DEFINE_IDR(idr);
+       idr_init_base(&idr, base);
+       idr_u32_test1(&idr, 10);
+       idr_u32_test1(&idr, 0x7fffffff);
+       idr_u32_test1(&idr, 0x80000000);
+       idr_u32_test1(&idr, 0x80000001);
+       idr_u32_test1(&idr, 0xffe00000);
+       idr_u32_test1(&idr, 0xffffffff);
+}
+
 void idr_checks(void)
 {
        unsigned long i;
@@ -248,6 +297,9 @@ void idr_checks(void)
        idr_get_next_test(0);
        idr_get_next_test(1);
        idr_get_next_test(4);
+       idr_u32_test(4);
+       idr_u32_test(1);
+       idr_u32_test(0);
 }
 
 /*
index 6903ccf..44a0d1a 100644 (file)
@@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
        struct radix_tree_node *node;
 
-       if (flags & __GFP_NOWARN)
+       if (!(flags & __GFP_DIRECT_RECLAIM))
                return NULL;
 
        pthread_mutex_lock(&cachep->lock);
@@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 
 void *kmalloc(size_t size, gfp_t gfp)
 {
-       void *ret = malloc(size);
+       void *ret;
+
+       if (!(gfp & __GFP_DIRECT_RECLAIM))
+               return NULL;
+
+       ret = malloc(size);
        uatomic_inc(&nr_allocated);
        if (kmalloc_verbose)
                printf("Allocating %p from malloc\n", ret);
+       if (gfp & __GFP_ZERO)
+               memset(ret, 0, size);
        return ret;
 }
 
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644 (file)
index 0000000..e69de29
index e9fff59..e3201cc 100644 (file)
@@ -11,6 +11,7 @@
 #define __GFP_IO               0x40u
 #define __GFP_FS               0x80u
 #define __GFP_NOWARN           0x200u
+#define __GFP_ZERO             0x8000u
 #define __GFP_ATOMIC           0x80000u
 #define __GFP_ACCOUNT          0x100000u
 #define __GFP_DIRECT_RECLAIM   0x400000u
index 979baee..a037def 100644 (file)
@@ -3,6 +3,7 @@
 #define SLAB_H
 
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #define SLAB_HWCACHE_ALIGN 1
 #define SLAB_PANIC 2
 void *kmalloc(size_t size, gfp_t);
 void kfree(void *);
 
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+        return kmalloc(size, gfp | __GFP_ZERO);
+}
+
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
index 1a74922..f6304d2 100644 (file)
@@ -11,11 +11,11 @@ all:
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-               #SUBDIR test prog name should be in the form: SUBDIR_test.sh
+               #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
                TEST=$$DIR"_test.sh"; \
-               if [ -e $$DIR/$$TEST ]; then
-                       rsync -a $$DIR/$$TEST $$BUILD_TARGET/;
-               fi
+               if [ -e $$DIR/$$TEST ]; then \
+                       rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+               fi \
        done
 
 override define RUN_TESTS
index cea4adc..a63e845 100644 (file)
@@ -12,9 +12,9 @@ all:
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-               if [ -e $$DIR/$(TEST_PROGS) ]; then
-                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
-               fi
+               if [ -e $$DIR/$(TEST_PROGS) ]; then \
+                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+               fi \
        done
 
 override define RUN_TESTS
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644 (file)
index 0000000..835c7f4
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
index 86636d2..183b468 100644 (file)
@@ -4,7 +4,7 @@ all:
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
+override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
 override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 run_full_test:
index 6a8e5a9..d148f9f 100644 (file)
@@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y
 CONFIG_PSTORE=y
 CONFIG_PSTORE_PMSG=y
 CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
index b3c8ba3..d0121a8 100644 (file)
@@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
        $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
 
 $(OBJS): $(OUTPUT)/%.o: %.c
-       $(CC) -c $^ -o $@
+       $(CC) -c $^ -o $@ $(CFLAGS)
 
 $(TESTS): $(OUTPUT)/%.o: %.c
        $(CC) -c $^ -o $@
index 3d5a62f..f5d7a78 100644 (file)
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
 ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
@@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
 
-TEST_PROGS := vdso_test vdso_standalone_test_x86
+TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86
 
 all: $(TEST_PROGS)
-vdso_test: parse_vdso.c vdso_test.c
-vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
        $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
                vdso_standalone_test_x86.c parse_vdso.c \
-               -o vdso_standalone_test_x86
+               -o $@
 
-include ../lib.mk
-clean:
-       rm -fr $(TEST_PROGS)
+EXTRA_CLEAN := $(TEST_PROGS)
 endif
index 63c94d7..342c7bc 100644 (file)
@@ -11,3 +11,4 @@ mlock-intersect-test
 mlock-random-test
 virtual_address_range
 gup_benchmark
+va_128TBswitch
index 70268c0..70f4c30 100644 (file)
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
 static u32 host_vtimer_irq_flags;
 
+static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+
 static const struct kvm_irq_level default_ptimer_irq = {
        .irq    = 30,
        .level  = 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
        return timecounter->cc->read(timecounter->cc);
 }
 
+static inline bool userspace_irqchip(struct kvm *kvm)
+{
+       return static_branch_unlikely(&userspace_irqchip_in_use) &&
+               unlikely(!irqchip_in_kernel(kvm));
+}
+
 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
 {
        hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
                cancel_work_sync(work);
 }
 
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-       /*
-        * When using a userspace irqchip with the architected timers, we must
-        * prevent continuously exiting from the guest, and therefore mask the
-        * physical interrupt by disabling it on the host interrupt controller
-        * when the virtual level is high, such that the guest can make
-        * forward progress.  Once we detect the output level being
-        * de-asserted, we unmask the interrupt again so that we exit from the
-        * guest when the timer fires.
-        */
-       if (vtimer->irq.level)
-               disable_percpu_irq(host_vtimer_irq);
-       else
-               enable_percpu_irq(host_vtimer_irq, 0);
-}
-
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
        if (kvm_timer_should_fire(vtimer))
                kvm_timer_update_irq(vcpu, true, vtimer);
 
-       if (static_branch_unlikely(&userspace_irqchip_in_use) &&
-           unlikely(!irqchip_in_kernel(vcpu->kvm)))
-               kvm_vtimer_update_mask_user(vcpu);
+       if (userspace_irqchip(vcpu->kvm) &&
+           !static_branch_unlikely(&has_gic_active_state))
+               disable_percpu_irq(host_vtimer_irq);
 
        return IRQ_HANDLED;
 }
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
        trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
                                   timer_ctx->irq.level);
 
-       if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
-           likely(irqchip_in_kernel(vcpu->kvm))) {
+       if (!userspace_irqchip(vcpu->kvm)) {
                ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
                                          timer_ctx->irq.irq,
                                          timer_ctx->irq.level,
@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
        phys_timer_emulate(vcpu);
 }
 
-static void __timer_snapshot_state(struct arch_timer_context *timer)
-{
-       timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-       timer->cnt_cval = read_sysreg_el0(cntv_cval);
-}
-
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
        if (!vtimer->loaded)
                goto out;
 
-       if (timer->enabled)
-               __timer_snapshot_state(vtimer);
+       if (timer->enabled) {
+               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+       }
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
        kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
 
-static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
+static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+{
+       int r;
+       r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+       WARN_ON(r);
+}
+
+static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        bool phys_active;
-       int ret;
 
-       phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-
-       ret = irq_set_irqchip_state(host_vtimer_irq,
-                                   IRQCHIP_STATE_ACTIVE,
-                                   phys_active);
-       WARN_ON(ret);
+       if (irqchip_in_kernel(vcpu->kvm))
+               phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+       else
+               phys_active = vtimer->irq.level;
+       set_vtimer_irq_phys_active(vcpu, phys_active);
 }
 
-static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 {
-       kvm_vtimer_update_mask_user(vcpu);
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+       /*
+        * When using a userspace irqchip with the architected timers and a
+        * host interrupt controller that doesn't support an active state, we
+        * must still prevent continuously exiting from the guest, and
+        * therefore mask the physical interrupt by disabling it on the host
+        * interrupt controller when the virtual level is high, such that the
+        * guest can make forward progress.  Once we detect the output level
+        * being de-asserted, we unmask the interrupt again so that we exit
+        * from the guest when the timer fires.
+        */
+       if (vtimer->irq.level)
+               disable_percpu_irq(host_vtimer_irq);
+       else
+               enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
        if (unlikely(!timer->enabled))
                return;
 
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
-               kvm_timer_vcpu_load_user(vcpu);
+       if (static_branch_likely(&has_gic_active_state))
+               kvm_timer_vcpu_load_gic(vcpu);
        else
-               kvm_timer_vcpu_load_vgic(vcpu);
+               kvm_timer_vcpu_load_nogic(vcpu);
 
        set_cntvoff(vtimer->cntvoff);
 
@@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
-               __timer_snapshot_state(vtimer);
-               if (!kvm_timer_should_fire(vtimer)) {
-                       kvm_timer_update_irq(vcpu, false, vtimer);
-                       kvm_vtimer_update_mask_user(vcpu);
-               }
+       if (!kvm_timer_should_fire(vtimer)) {
+               kvm_timer_update_irq(vcpu, false, vtimer);
+               if (static_branch_likely(&has_gic_active_state))
+                       set_vtimer_irq_phys_active(vcpu, false);
+               else
+                       enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
        }
 }
 
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       unmask_vtimer_irq_user(vcpu);
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+       if (unlikely(!timer->enabled))
+               return;
+
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               unmask_vtimer_irq_user(vcpu);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic)
                        kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
                        goto out_free_irq;
                }
+
+               static_branch_enable(&has_gic_active_state);
        }
 
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
index 4501e65..65dea3f 100644 (file)
@@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                /* Check for overlaps */
                r = -EEXIST;
                kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
-                       if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-                           (slot->id == id))
+                       if (slot->id == id)
                                continue;
                        if (!((base_gfn + npages <= slot->base_gfn) ||
                              (base_gfn >= slot->base_gfn + slot->npages)))