Merge tag 'kvm-riscv-6.6-1' of https://github.com/kvm-riscv/linux into HEAD
authorPaolo Bonzini <pbonzini@redhat.com>
Thu, 31 Aug 2023 17:25:55 +0000 (13:25 -0400)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 31 Aug 2023 17:25:55 +0000 (13:25 -0400)
KVM/riscv changes for 6.6

- Zba, Zbs, Zicntr, Zicsr, Zifencei, and Zihpm support for Guest/VM
- Added ONE_REG interface for SATP mode
- Added ONE_REG interface to enable/disable multiple ISA extensions
- Improved error codes returned by ONE_REG interfaces
- Added KVM_GET_REG_LIST ioctl() implementation for KVM RISC-V
- Added get-reg-list selftest for KVM RISC-V

864 files changed:
.mailmap
Documentation/ABI/testing/sysfs-class-led-trigger-netdev
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/ABI/testing/sysfs-platform-hidma
Documentation/ABI/testing/sysfs-platform-hidma-mgmt
Documentation/admin-guide/hw-vuln/gather_data_sampling.rst [new file with mode: 0644]
Documentation/admin-guide/hw-vuln/index.rst
Documentation/admin-guide/hw-vuln/srso.rst [new file with mode: 0644]
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml
Documentation/i2c/writing-clients.rst
Documentation/networking/nf_conntrack-sysctl.rst
MAINTAINERS
Makefile
arch/alpha/include/asm/processor.h
arch/alpha/kernel/setup.c
arch/arm/boot/dts/arm/integratorap.dts
arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi
arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
arch/arm/boot/dts/nxp/imx/imx7s.dtsi
arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
arch/arm/include/asm/arm_pmuv3.h
arch/arm/mach-zynq/pm.c
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx93.dtsi
arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
arch/arm64/boot/dts/qcom/sa8775p-ride.dts
arch/arm64/boot/dts/qcom/sc7180.dtsi
arch/arm64/boot/dts/qcom/sc8180x.dtsi
arch/arm64/boot/dts/qcom/sm8150.dtsi
arch/arm64/boot/dts/qcom/sm8250.dtsi
arch/arm64/boot/dts/qcom/sm8350.dtsi
arch/arm64/boot/dts/rockchip/px30.dtsi
arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts
arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts
arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts
arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi
arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts
arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts
arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi
arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi
arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts
arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi
arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/signal.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/arm.c
arch/arm64/kvm/emulate-nested.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/nvhe/mm.h
arch/arm64/kvm/hyp/nvhe/ffa.c
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/tlb.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/pmu.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/trace_arm.h
arch/arm64/kvm/vgic/vgic.h
arch/arm64/tools/cpucaps
arch/arm64/tools/sysreg
arch/ia64/include/asm/processor.h
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/include/asm/Kbuild
arch/loongarch/include/asm/fpu.h
arch/loongarch/include/asm/ptrace.h
arch/loongarch/include/asm/smp.h
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/hw_breakpoint.c
arch/loongarch/kernel/mcount.S
arch/loongarch/kernel/mcount_dyn.S
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/smp.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/unaligned.S
arch/loongarch/mm/page.S
arch/loongarch/mm/tlbex.S
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
arch/mips/kvm/mips.c
arch/mips/kvm/mmu.c
arch/parisc/Kconfig.debug
arch/parisc/boot/compressed/misc.c
arch/parisc/include/asm/dma.h
arch/parisc/include/asm/ftrace.h
arch/parisc/include/asm/spinlock.h
arch/parisc/include/asm/spinlock_types.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/firmware.c
arch/parisc/kernel/ftrace.c
arch/parisc/kernel/parisc_ksyms.c
arch/parisc/kernel/pci-dma.c
arch/parisc/kernel/pdt.c
arch/parisc/kernel/perf.c
arch/parisc/kernel/processor.c
arch/parisc/kernel/setup.c
arch/parisc/kernel/signal.c
arch/parisc/kernel/sys_parisc.c
arch/parisc/kernel/syscall.S
arch/parisc/kernel/unaligned.c
arch/parisc/lib/ucmpdi2.c
arch/parisc/mm/fault.c
arch/parisc/mm/init.c
arch/parisc/mm/ioremap.c
arch/powerpc/include/asm/processor.h
arch/powerpc/kernel/rtas_flash.c
arch/powerpc/mm/book3s64/subpage_prot.c
arch/riscv/Kconfig
arch/riscv/include/asm/cacheflush.h
arch/riscv/include/asm/insn.h
arch/riscv/include/asm/mmio.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/vector.h
arch/riscv/include/asm/vmalloc.h
arch/riscv/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
arch/riscv/include/uapi/asm/ptrace.h
arch/riscv/kernel/compat_vdso/Makefile
arch/riscv/kernel/cpu.c
arch/riscv/kernel/elf_kexec.c
arch/riscv/kernel/irq.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/traps.c
arch/riscv/kvm/mmu.c
arch/riscv/lib/uaccess.S
arch/riscv/mm/init.c
arch/riscv/mm/kasan_init.c
arch/riscv/mm/pageattr.c
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/uv.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/kernel/uv.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/pv.c
arch/s390/mm/fault.c
arch/s390/mm/gmap.c
arch/sparc/include/asm/processor_64.h
arch/x86/Kconfig
arch/x86/boot/compressed/idt_64.c
arch/x86/boot/compressed/sev.c
arch/x86/entry/vdso/vma.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/entry-common.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/linkage.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/segment.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu.h
arch/x86/kernel/fpu/context.h
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/static_call.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/svm/vmenter.S
arch/x86/kvm/x86.c
arch/x86/lib/retpoline.S
block/blk-cgroup.c
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-iocost.c
block/blk-mq.c
block/elevator.c
block/fops.c
crypto/af_alg.c
drivers/accel/ivpu/ivpu_gem.c
drivers/accel/qaic/qaic_control.c
drivers/accel/qaic/qaic_data.c
drivers/acpi/resource.c
drivers/acpi/scan.c
drivers/android/binder.c
drivers/android/binder_alloc.c
drivers/android/binder_alloc.h
drivers/base/cpu.c
drivers/block/rnbd/rnbd-clt-sysfs.c
drivers/block/zram/zram_drv.c
drivers/bus/ti-sysc.c
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm_crb.c
drivers/char/tpm/tpm_tis.c
drivers/clk/clk-devres.c
drivers/clk/keystone/syscon-clk.c
drivers/counter/Kconfig
drivers/cpufreq/amd-pstate.c
drivers/cpuidle/cpuidle-psci-domain.c
drivers/cpuidle/dt_idle_genpd.c
drivers/cpuidle/dt_idle_genpd.h
drivers/crypto/caam/ctrl.c
drivers/dma-buf/sw_sync.c
drivers/dma/Kconfig
drivers/dma/idxd/device.c
drivers/dma/mcf-edma.c
drivers/dma/owl-dma.c
drivers/dma/pl330.c
drivers/dma/xilinx/xdma.c
drivers/gpio/gpio-sim.c
drivers/gpio/gpio-ws16c48.c
drivers/gpio/gpiolib-sysfs.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
drivers/gpu/drm/bridge/ite-it6505.c
drivers/gpu/drm/bridge/lontium-lt9611.c
drivers/gpu/drm/bridge/samsung-dsim.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_gem_shmem_helper.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/i915/display/intel_display_device.c
drivers/gpu/drm/i915/display/intel_hotplug.c
drivers/gpu/drm/i915/display/intel_sdvo.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_huc.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/nouveau/nouveau_connector.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/panfrost/panfrost_devfreq.c
drivers/gpu/drm/qxl/qxl_drv.h
drivers/gpu/drm/qxl/qxl_dumb.c
drivers/gpu/drm/qxl/qxl_gem.c
drivers/gpu/drm/qxl/qxl_ioctl.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
drivers/hwmon/aquacomputer_d5next.c
drivers/hwmon/pmbus/bel-pfe.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-hisi.c
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-sun6i-p2wi.c
drivers/i2c/busses/i2c-tegra.c
drivers/iio/adc/ad7192.c
drivers/iio/adc/ina2xx-adc.c
drivers/iio/adc/meson_saradc.c
drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
drivers/iio/frequency/admv1013.c
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
drivers/iio/industrialio-core.c
drivers/iio/light/rohm-bu27008.c
drivers/iio/light/rohm-bu27034.c
drivers/infiniband/core/umem.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/hfi1/chip.c
drivers/interconnect/qcom/bcm-voter.c
drivers/interconnect/qcom/icc-rpmh.h
drivers/interconnect/qcom/sa8775p.c
drivers/interconnect/qcom/sm8450.c
drivers/interconnect/qcom/sm8550.c
drivers/isdn/mISDN/dsp.h
drivers/isdn/mISDN/dsp_cmx.c
drivers/isdn/mISDN/dsp_core.c
drivers/leds/trigger/ledtrig-netdev.c
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
drivers/media/platform/nxp/imx7-media-csi.c
drivers/media/platform/qcom/venus/hfi_cmds.c
drivers/media/usb/uvc/uvc_v4l2.c
drivers/misc/cardreader/rts5227.c
drivers/misc/cardreader/rts5228.c
drivers/misc/cardreader/rts5249.c
drivers/misc/cardreader/rts5260.c
drivers/misc/cardreader/rts5261.c
drivers/misc/cardreader/rtsx_pcr.c
drivers/misc/tps6594-esm.c
drivers/mmc/core/block.c
drivers/mmc/host/moxart-mmc.c
drivers/mmc/host/sdhci_f_sdh30.c
drivers/mmc/host/sunplus-mmc.c
drivers/mmc/host/wbsd.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/can/vxcan.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_fdir.c
drivers/net/ethernet/intel/iavf/iavf_fdir.h
drivers/net/ethernet/intel/ice/ice_base.c
drivers/net/ethernet/intel/ice/ice_eswitch.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_sriov.c
drivers/net/ethernet/intel/ice/ice_vf_lib.c
drivers/net/ethernet/intel/ice/ice_vf_lib.h
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_defines.h
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
drivers/net/ethernet/marvell/octeon_ep/octep_main.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/prestera/prestera_router.c
drivers/net/ethernet/mediatek/mtk_wed.c
drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
drivers/net/ethernet/microsoft/mana/mana_en.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/sfc/ef100_nic.c
drivers/net/ethernet/sfc/falcon/selftest.c
drivers/net/ethernet/sfc/selftest.c
drivers/net/ethernet/sfc/siena/selftest.c
drivers/net/ethernet/sfc/tc.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macsec.c
drivers/net/mdio/mdio-bitbang.c
drivers/net/pcs/pcs-rzn1-miic.c
drivers/net/phy/at803x.c
drivers/net/phy/broadcom.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/sfp-bus.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vxlan/vxlan_vnifilter.c
drivers/net/wireguard/allowedips.c
drivers/net/wireguard/selftest/allowedips.c
drivers/net/wireless/ath/ath12k/wmi.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/realtek/rtw89/mac.c
drivers/net/xen-netback/netback.c
drivers/nvme/host/core.c
drivers/nvme/host/ioctl.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/of/dynamic.c
drivers/of/kexec.c
drivers/of/platform.c
drivers/of/unittest.c
drivers/parisc/sba_iommu.c
drivers/pci/bus.c
drivers/pci/controller/Kconfig
drivers/pci/controller/dwc/pcie-designware-host.c
drivers/pci/controller/dwc/pcie-designware.c
drivers/pci/controller/dwc/pcie-designware.h
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/of.c
drivers/perf/arm_pmuv3.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/qcom/pinctrl-msm.c
drivers/pinctrl/qcom/pinctrl-msm.h
drivers/pinctrl/qcom/pinctrl-sa8775p.c
drivers/pinctrl/renesas/pinctrl-rza2.c
drivers/pinctrl/renesas/pinctrl-rzg2l.c
drivers/pinctrl/renesas/pinctrl-rzv2m.c
drivers/platform/mellanox/mlxbf-tmfifo.c
drivers/platform/x86/amd/pmf/sps.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.c
drivers/platform/x86/lenovo-ymc.c
drivers/platform/x86/mlx-platform.c
drivers/platform/x86/msi-ec.c
drivers/platform/x86/serial-multi-instantiate.c
drivers/regulator/da9063-regulator.c
drivers/regulator/qcom-rpmh-regulator.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/s390/crypto/vfio_ap_private.h
drivers/scsi/53c700.c
drivers/scsi/fnic/fnic.h
drivers/scsi/fnic/fnic_scsi.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qedi/qedi_main.c
drivers/scsi/raid_class.c
drivers/scsi/scsi_proc.c
drivers/scsi/snic/snic_disc.c
drivers/scsi/storvsc_drv.c
drivers/soc/aspeed/aspeed-socinfo.c
drivers/soc/aspeed/aspeed-uart-routing.c
drivers/spi/spi-cadence.c
drivers/spi/spi-stm32.c
drivers/thunderbolt/tb.c
drivers/thunderbolt/tmu.c
drivers/tty/Kconfig
drivers/tty/n_gsm.c
drivers/tty/serial/8250/8250_core.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/serial_base.h
drivers/tty/serial/serial_base_bus.c
drivers/ufs/core/ufs-mcq.c
drivers/ufs/host/ufs-qcom.c
drivers/ufs/host/ufs-renesas.c
drivers/usb/common/usb-conn-gpio.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/udc/core.c
drivers/usb/storage/alauda.c
drivers/usb/typec/altmodes/displayport.c
drivers/usb/typec/mux/Kconfig
drivers/usb/typec/mux/nb7vpq904m.c
drivers/usb/typec/tcpm/tcpm.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/core/mr.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/pds/Makefile
drivers/vdpa/pds/debugfs.c
drivers/vdpa/pds/vdpa_dev.c
drivers/vdpa/pds/vdpa_dev.h
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vhost/scsi.c
drivers/video/fbdev/amifb.c
drivers/video/fbdev/atmel_lcdfb.c
drivers/video/fbdev/goldfishfb.c
drivers/video/fbdev/mmp/hw/mmp_ctrl.c
drivers/video/fbdev/ssd1307fb.c
drivers/virtio/virtio_mem.c
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_legacy.c
drivers/virtio/virtio_vdpa.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_map.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/gfs2/file.c
fs/gfs2/trans.c
fs/inode.c
fs/nfs/direct.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4proc.c
fs/nfs/sysfs.c
fs/nfsd/nfs4state.c
fs/nfsd/nfsctl.c
fs/nilfs2/inode.c
fs/nilfs2/segment.c
fs/nilfs2/the_nilfs.h
fs/proc/kcore.c
fs/proc/task_mmu.c
fs/smb/client/cifs_debug.c
fs/smb/client/file.c
fs/smb/client/fs_context.c
fs/smb/server/smb2misc.c
fs/smb/server/smb2pdu.c
fs/vboxsf/shfl_hostintf.h
fs/zonefs/file.c
fs/zonefs/super.c
fs/zonefs/zonefs.h
include/drm/display/drm_dp.h
include/drm/drm_edid.h
include/drm/drm_probe_helper.h
include/kvm/arm_pmu.h
include/linux/bio.h
include/linux/blkdev.h
include/linux/clk.h
include/linux/cpu.h
include/linux/huge_mm.h
include/linux/kvm_host.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/pagewalk.h
include/linux/prefetch.h
include/linux/raid_class.h
include/linux/serial_core.h
include/linux/skmsg.h
include/linux/tpm.h
include/linux/trace_events.h
include/linux/virtio_net.h
include/net/bonding.h
include/net/cfg80211.h
include/net/inet_sock.h
include/net/ip.h
include/net/mac80211.h
include/net/netfilter/nf_tables.h
include/net/rtnetlink.h
include/net/sock.h
include/net/xfrm.h
include/trace/events/tcp.h
include/uapi/linux/elf.h
include/video/kyro.h
io_uring/io_uring.c
io_uring/openclose.c
kernel/irq/resend.c
kernel/power/hibernate.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_events_synth.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_sched_wakeup.c
kernel/workqueue.c
lib/Kconfig.debug
lib/clz_ctz.c
lib/maple_tree.c
lib/radix-tree.c
lib/scatterlist.c
mm/compaction.c
mm/damon/core.c
mm/damon/vaddr.c
mm/gup.c
mm/hmm.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/ksm.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/mempolicy.c
mm/migrate_device.c
mm/mincore.c
mm/mlock.c
mm/mprotect.c
mm/pagewalk.c
mm/shmem.c
mm/swapfile.c
mm/vmalloc.c
mm/vmscan.c
mm/zsmalloc.c
net/batman-adv/bat_v_elp.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/hard-interface.c
net/batman-adv/netlink.c
net/batman-adv/soft-interface.c
net/batman-adv/translation-table.c
net/batman-adv/types.h
net/can/isotp.c
net/can/raw.c
net/core/filter.c
net/core/rtnetlink.c
net/core/skmsg.c
net/core/sock.c
net/core/sock_map.c
net/dccp/ipv4.c
net/dccp/output.c
net/dccp/proto.c
net/devlink/leftover.c
net/ipv4/af_inet.c
net/ipv4/datagram.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ip_vti.c
net/ipv4/nexthop.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c
net/ipv6/Kconfig
net/ipv6/ip6_vti.c
net/ipv6/ndisc.c
net/key/af_key.c
net/mac80211/rx.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_dynset.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_rbtree.c
net/openvswitch/datapath.c
net/packet/af_packet.c
net/sched/sch_api.c
net/sctp/socket.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_clc.c
net/smc/smc_core.c
net/smc/smc_sysctl.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/verbs.c
net/tls/tls_device.c
net/tls/tls_main.c
net/wireless/nl80211.c
net/xdp/xsk.c
net/xfrm/xfrm_compat.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_interface_core.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
rust/macros/vtable.rs
samples/ftrace/ftrace-direct-modify.c
samples/ftrace/ftrace-direct-multi-modify.c
samples/ftrace/ftrace-direct-multi.c
samples/ftrace/ftrace-direct-too.c
samples/ftrace/ftrace-direct.c
scripts/kallsyms.c
security/keys/sysctl.c
security/selinux/ss/policydb.c
sound/pci/hda/patch_cs8409-tables.c
sound/pci/hda/patch_realtek.c
sound/pci/ymfpci/ymfpci.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/codecs/cs35l41.c
sound/soc/codecs/cs35l56-i2c.c
sound/soc/codecs/cs35l56-spi.c
sound/soc/codecs/cs35l56.c
sound/soc/codecs/max98363.c
sound/soc/codecs/rt1308-sdw.c
sound/soc/codecs/rt5665.c
sound/soc/codecs/tas2781-comlib.c
sound/soc/fsl/fsl_micfil.c
sound/soc/fsl/fsl_micfil.h
sound/soc/intel/boards/sof_sdw.c
sound/soc/intel/boards/sof_sdw_cs42l42.c
sound/soc/meson/axg-tdm-formatter.c
sound/soc/soc-pcm.c
sound/soc/sof/intel/hda-dai-ops.c
sound/soc/sof/intel/hda-dai.c
sound/soc/sof/intel/hda.h
sound/soc/sof/ipc3.c
sound/soc/sof/ipc4-pcm.c
sound/soc/sof/ipc4-topology.c
sound/usb/quirks-table.h
tools/arch/arm64/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/arch/riscv/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/msr-index.h
tools/counter/Makefile
tools/objtool/arch/x86/decode.c
tools/objtool/check.c
tools/objtool/include/objtool/arch.h
tools/objtool/include/objtool/elf.h
tools/perf/util/machine.c
tools/perf/util/stat-display.c
tools/perf/util/thread-stack.c
tools/testing/radix-tree/regression1.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/progs/test_sockmap_listen.c
tools/testing/selftests/cachestat/test_cachestat.c
tools/testing/selftests/cgroup/test_kmem.c
tools/testing/selftests/drivers/net/bonding/Makefile
tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/bonding/bond_options.sh
tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc [new file with mode: 0644]
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/aarch64/debug-exceptions.c
tools/testing/selftests/kvm/aarch64/hypercalls.c
tools/testing/selftests/kvm/aarch64/page_fault_test.c
tools/testing/selftests/kvm/aarch64/vgic_irq.c
tools/testing/selftests/kvm/guest_print_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/include/aarch64/arch_timer.h
tools/testing/selftests/kvm/include/aarch64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/riscv/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390x/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/ucall_common.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/aarch64/ucall.c
tools/testing/selftests/kvm/lib/guest_sprintf.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/riscv/ucall.c
tools/testing/selftests/kvm/lib/s390x/ucall.c
tools/testing/selftests/kvm/lib/sparsebit.c
tools/testing/selftests/kvm/lib/string_override.c
tools/testing/selftests/kvm/lib/ucall_common.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/max_guest_memory_test.c
tools/testing/selftests/kvm/memslot_perf_test.c
tools/testing/selftests/kvm/s390x/cmma_test.c
tools/testing/selftests/kvm/s390x/debug_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/memop.c
tools/testing/selftests/kvm/s390x/tprot.c
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/steal_time.c
tools/testing/selftests/kvm/x86_64/cpuid_test.c
tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
tools/testing/selftests/kvm/x86_64/hyperv_features.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/userspace_io_test.c
tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
tools/testing/selftests/kvm/x86_64/xapic_state_test.c
tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
tools/testing/selftests/mm/hmm-tests.c
tools/testing/selftests/mm/ksm_tests.c
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/forwarding/bridge_mdb.sh
tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
tools/testing/selftests/net/forwarding/ethtool.sh
tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
tools/testing/selftests/net/forwarding/ethtool_mm.sh
tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
tools/testing/selftests/net/forwarding/settings [new file with mode: 0644]
tools/testing/selftests/net/forwarding/tc_actions.sh
tools/testing/selftests/net/forwarding/tc_flower.sh
tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/pmtu.sh
tools/testing/selftests/rseq/Makefile
tools/testing/selftests/rseq/rseq.c
virt/kvm/Kconfig
virt/kvm/kvm_main.c

index 5dd3181..e506625 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -538,6 +538,8 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
 Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
 Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
 Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
+Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
+Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
 Simon Kelley <simon@thekelleys.org.uk>
 Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
 Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
index 78b62a2..f6d9d72 100644 (file)
@@ -13,7 +13,7 @@ Description:
                Specifies the duration of the LED blink in milliseconds.
                Defaults to 50 ms.
 
-               With hw_control ON, the interval value MUST be set to the
+               When offloaded is true, the interval value MUST be set to the
                default value and cannot be changed.
                Trying to set any value in this specific mode will return
                an EINVAL error.
@@ -44,8 +44,8 @@ Description:
                If set to 1, the LED will blink for the milliseconds specified
                in interval to signal transmission.
 
-               With hw_control ON, the blink interval is controlled by hardware
-               and won't reflect the value set in interval.
+               When offloaded is true, the blink interval is controlled by
+               hardware and won't reflect the value set in interval.
 
 What:          /sys/class/leds/<led>/rx
 Date:          Dec 2017
@@ -59,21 +59,21 @@ Description:
                If set to 1, the LED will blink for the milliseconds specified
                in interval to signal reception.
 
-               With hw_control ON, the blink interval is controlled by hardware
-               and won't reflect the value set in interval.
+               When offloaded is true, the blink interval is controlled by
+               hardware and won't reflect the value set in interval.
 
-What:          /sys/class/leds/<led>/hw_control
+What:          /sys/class/leds/<led>/offloaded
 Date:          Jun 2023
 KernelVersion: 6.5
 Contact:       linux-leds@vger.kernel.org
 Description:
-               Communicate whether the LED trigger modes are driven by hardware
-               or software fallback is used.
+               Communicate whether the LED trigger modes are offloaded to
+               hardware or whether software fallback is used.
 
                If 0, the LED is using software fallback to blink.
 
-               If 1, the LED is using hardware control to blink and signal the
-               requested modes.
+               If 1, the LED blinking in requested mode is offloaded to
+               hardware.
 
 What:          /sys/class/leds/<led>/link_10
 Date:          Jun 2023
index ecd585c..77942ee 100644 (file)
@@ -513,17 +513,18 @@ Description:      information about CPUs heterogeneity.
                cpu_capacity: capacity of cpuX.
 
 What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+               /sys/devices/system/cpu/vulnerabilities/l1tf
+               /sys/devices/system/cpu/vulnerabilities/mds
                /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+               /sys/devices/system/cpu/vulnerabilities/retbleed
+               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
                /sys/devices/system/cpu/vulnerabilities/spectre_v1
                /sys/devices/system/cpu/vulnerabilities/spectre_v2
-               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
-               /sys/devices/system/cpu/vulnerabilities/l1tf
-               /sys/devices/system/cpu/vulnerabilities/mds
                /sys/devices/system/cpu/vulnerabilities/srbds
                /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
-               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
-               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
-               /sys/devices/system/cpu/vulnerabilities/retbleed
 Date:          January 2018
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Information about CPU vulnerabilities
index fca40a5..a80aeda 100644 (file)
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-*/chid
                /sys/devices/platform/QCOM8061:*/chid
 Date:          Dec 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the ID of the channel within the HIDMA instance.
                It is used to associate a given HIDMA channel with the
index 3b6c5c9..0373745 100644 (file)
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
                /sys/devices/platform/QCOM8060:*/chanops/chan*/priority
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains either 0 or 1 and indicates if the DMA channel is a
                low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
                /sys/devices/platform/QCOM8060:*/chanops/chan*/weight
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains 0..15 and indicates the weight of the channel among
                equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
                /sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the platform specific cycle value to wait after a
                reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What:         /sys/devices/platform/hidma-mgmt*/dma_channels
                /sys/devices/platform/QCOM8060:*/dma_channels
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the number of dma channels supported by one instance
                of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_major
                /sys/devices/platform/QCOM8060:*/hw_version_major
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Version number major for the hardware.
 
@@ -49,7 +49,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_minor
                /sys/devices/platform/QCOM8060:*/hw_version_minor
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Version number minor for the hardware.
 
@@ -57,7 +57,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_rd_xactions
                /sys/devices/platform/QCOM8060:*/max_rd_xactions
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains a value between 0 and 31. Maximum number of
                read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_read_request
                /sys/devices/platform/QCOM8060:*/max_read_request
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Size of each read request. The value needs to be a power
                of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_wr_xactions
                /sys/devices/platform/QCOM8060:*/max_wr_xactions
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains a value between 0 and 31. Maximum number of
                write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_write_request
                /sys/devices/platform/QCOM8060:*/max_write_request
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Size of each write request. The value needs to be a power
                of two and can be between 128 and 1024.
diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
new file mode 100644 (file)
index 0000000..264bfa9
--- /dev/null
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+GDS - Gather Data Sampling
+==========================
+
+Gather Data Sampling is a hardware vulnerability which allows unprivileged
+speculative access to data which was previously stored in vector registers.
+
+Problem
+-------
+When a gather instruction performs loads from memory, different data elements
+are merged into the destination vector register. However, when a gather
+instruction that is transiently executed encounters a fault, stale data from
+architectural or internal vector registers may get transiently forwarded to the
+destination vector register instead. This will allow a malicious attacker to
+infer stale data using typical side channel techniques like cache timing
+attacks. GDS is a purely sampling-based attack.
+
+The attacker uses gather instructions to infer the stale vector register data.
+The victim does not need to do anything special other than use the vector
+registers. The victim does not need to use gather instructions to be
+vulnerable.
+
+Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
+are possible.
+
+Attack scenarios
+----------------
+Without mitigation, GDS can infer stale data across virtually all
+permission boundaries:
+
+       Non-enclaves can infer SGX enclave data
+       Userspace can infer kernel data
+       Guests can infer data from hosts
+       Guest can infer guest from other guests
+       Users can infer data from other users
+
+Because of this, it is important to ensure that the mitigation stays enabled in
+lower-privilege contexts like guests and when running outside SGX enclaves.
+
+The hardware enforces the mitigation for SGX. Likewise, VMMs should  ensure
+that guests are not allowed to disable the GDS mitigation. If a host erred and
+allowed this, a guest could theoretically disable GDS mitigation, mount an
+attack, and re-enable it.
+
+Mitigation mechanism
+--------------------
+This issue is mitigated in microcode. The microcode defines the following new
+bits:
+
+ ================================   ===   ============================
+ IA32_ARCH_CAPABILITIES[GDS_CTRL]   R/O   Enumerates GDS vulnerability
+                                          and mitigation support.
+ IA32_ARCH_CAPABILITIES[GDS_NO]     R/O   Processor is not vulnerable.
+ IA32_MCU_OPT_CTRL[GDS_MITG_DIS]    R/W   Disables the mitigation
+                                          0 by default.
+ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK]   R/W   Locks GDS_MITG_DIS=0. Writes
+                                          to GDS_MITG_DIS are ignored
+                                          Can't be cleared once set.
+ ================================   ===   ============================
+
+GDS can also be mitigated on systems that don't have updated microcode by
+disabling AVX. This can be done by setting gather_data_sampling="force" or
+"clearcpuid=avx" on the kernel command-line.
+
+If used, these options will disable AVX use by turning off XSAVE YMM support.
+However, the processor will still enumerate AVX support.  Userspace that
+does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
+support will break.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The mitigation can be disabled by setting "gather_data_sampling=off" or
+"mitigations=off" on the kernel command line. Not specifying either will default
+to the mitigation being enabled. Specifying "gather_data_sampling=force" will
+use the microcode mitigation when available or disable AVX on affected systems
+where the microcode hasn't been updated to include the mitigation.
+
+GDS System Information
+------------------------
+The kernel provides vulnerability status information through sysfs. For
+GDS this can be accessed by the following sysfs file:
+
+/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected                   Processor not vulnerable.
+ Vulnerable                     Processor vulnerable and mitigation disabled.
+ Vulnerable: No microcode       Processor vulnerable and microcode is missing
+                                mitigation.
+ Mitigation: AVX disabled,
+ no microcode                   Processor is vulnerable and microcode is missing
+                                mitigation. AVX disabled as mitigation.
+ Mitigation: Microcode          Processor is vulnerable and mitigation is in
+                                effect.
+ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
+                                effect and cannot be disabled.
+ Unknown: Dependent on
+ hypervisor status              Running on a virtual guest processor that is
+                                affected but with no way to know if host
+                                processor is mitigated or vulnerable.
+ ============================== =============================================
+
+GDS Default mitigation
+----------------------
+The updated microcode will enable the mitigation by default. The kernel's
+default action is to leave the mitigation enabled.
index e061476..de99caa 100644 (file)
@@ -13,9 +13,11 @@ are configurable at compile, boot or run time.
    l1tf
    mds
    tsx_async_abort
-   multihit.rst
-   special-register-buffer-data-sampling.rst
-   core-scheduling.rst
-   l1d_flush.rst
-   processor_mmio_stale_data.rst
-   cross-thread-rsb.rst
+   multihit
+   special-register-buffer-data-sampling
+   core-scheduling
+   l1d_flush
+   processor_mmio_stale_data
+   cross-thread-rsb
+   srso
+   gather_data_sampling
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
new file mode 100644 (file)
index 0000000..b6cfb51
--- /dev/null
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Speculative Return Stack Overflow (SRSO)
+========================================
+
+This is a mitigation for the speculative return stack overflow (SRSO)
+vulnerability found on AMD processors. The mechanism is by now the well
+known scenario of poisoning CPU functional units - the Branch Target
+Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
+tricking the elevated privilege domain (the kernel) into leaking
+sensitive data.
+
+AMD CPUs predict RET instructions using a Return Address Predictor (aka
+Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
+CALL instruction (i.e., an instruction predicted to be a CALL but is
+not actually a CALL) can create an entry in the RAP which may be used
+to predict the target of a subsequent RET instruction.
+
+The specific circumstances that lead to this varies by microarchitecture
+but the concern is that an attacker can mis-train the CPU BTB to predict
+non-architectural CALL instructions in kernel space and use this to
+control the speculative target of a subsequent kernel RET, potentially
+leading to information disclosure via a speculative side-channel.
+
+The issue is tracked under CVE-2023-20569.
+
+Affected processors
+-------------------
+
+AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
+processors have not been investigated.
+
+System information and options
+------------------------------
+
+First of all, it is required that the latest microcode be loaded for
+mitigations to be effective.
+
+The sysfs file showing SRSO mitigation status is:
+
+  /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+   The processor is not vulnerable
+
+ * 'Vulnerable: no microcode':
+
+   The processor is vulnerable, no microcode extending IBPB
+   functionality to address the vulnerability has been applied.
+
+ * 'Mitigation: microcode':
+
+   Extended IBPB functionality microcode patch has been applied. It does
+   not address User->Kernel and Guest->Host transitions protection but it
+   does address User->User and VM->VM attack vectors.
+
+   Note that User->User mitigation is controlled by how the IBPB aspect in
+   the Spectre v2 mitigation is selected:
+
+    * conditional IBPB:
+
+      where each process can select whether it needs an IBPB issued
+      around it PR_SPEC_DISABLE/_ENABLE etc, see :doc:`spectre`
+
+    * strict:
+
+      i.e., always on - by supplying spectre_v2_user=on on the kernel
+      command line
+
+   (spec_rstack_overflow=microcode)
+
+ * 'Mitigation: safe RET':
+
+   Software-only mitigation. It complements the extended IBPB microcode
+   patch functionality by addressing User->Kernel and Guest->Host
+   transitions protection.
+
+   Selected by default or by spec_rstack_overflow=safe-ret
+
+ * 'Mitigation: IBPB':
+
+   Similar protection as "safe RET" above but employs an IBPB barrier on
+   privilege domain crossings (User->Kernel, Guest->Host).
+
+  (spec_rstack_overflow=ibpb)
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+   Mitigation addressing the cloud provider scenario - the Guest->Host
+   transitions only.
+
+   (spec_rstack_overflow=ibpb-vmexit)
+
+
+
+In order to exploit vulnerability, an attacker needs to:
+
+ - gain local access on the machine
+
+ - break kASLR
+
+ - find gadgets in the running kernel in order to use them in the exploit
+
+ - potentially create and pin an additional workload on the sibling
+   thread, depending on the microarchitecture (not necessary on fam 0x19)
+
+ - run the exploit
+
+Considering the performance implications of each mitigation type, the
+default one is 'Mitigation: safe RET' which should take care of most
+attack vectors, including the local User->Kernel one.
+
+As always, the user is advised to keep her/his system up-to-date by
+applying software updates regularly.
+
+The default setting will be reevaluated when needed and especially when
+new attack vectors appear.
+
+As one can surmise, 'Mitigation: safe RET' does come at the cost of some
+performance depending on the workload. If one trusts her/his userspace
+and does not want to suffer the performance impact, one can always
+disable the mitigation with spec_rstack_overflow=off.
+
+Similarly, 'Mitigation: IBPB' is another full mitigation type employing
+an indrect branch prediction barrier after having applied the required
+microcode patch for one's system. This mitigation comes also at
+a performance cost.
+
+Mitigation: safe RET
+--------------------
+
+The mitigation works by ensuring all RET instructions speculate to
+a controlled location, similar to how speculation is controlled in the
+retpoline sequence.  To accomplish this, the __x86_return_thunk forces
+the CPU to mispredict every function return using a 'safe return'
+sequence.
+
+To ensure the safety of this mitigation, the kernel must ensure that the
+safe return sequence is itself free from attacker interference.  In Zen3
+and Zen4, this is accomplished by creating a BTB alias between the
+untraining function srso_alias_untrain_ret() and the safe return
+function srso_alias_safe_ret() which results in evicting a potentially
+poisoned BTB entry and using that safe one for all function returns.
+
+In older Zen1 and Zen2, this is accomplished using a reinterpretation
+technique similar to Retbleed one: srso_untrain_ret() and
+srso_safe_ret().
index a145799..722b6ec 100644 (file)
                        Format: off | on
                        default: on
 
+       gather_data_sampling=
+                       [X86,INTEL] Control the Gather Data Sampling (GDS)
+                       mitigation.
+
+                       Gather Data Sampling is a hardware vulnerability which
+                       allows unprivileged speculative access to data which was
+                       previously stored in vector registers.
+
+                       This issue is mitigated by default in updated microcode.
+                       The mitigation may have a performance impact but can be
+                       disabled. On systems without the microcode mitigation
+                       disabling AVX serves as a mitigation.
+
+                       force:  Disable AVX to mitigate systems without
+                               microcode mitigation. No effect if the microcode
+                               mitigation is present. Known to cause crashes in
+                               userspace with buggy AVX enumeration.
+
+                       off:    Disable GDS mitigation.
+
        gcov_persist=   [GCOV] When non-zero (default), profiling data for
                        kernel modules is saved and remains accessible via
                        debugfs, even when the module is unloaded/reloaded.
                                Disable all optional CPU mitigations.  This
                                improves system performance, but it may also
                                expose users to several CPU vulnerabilities.
-                               Equivalent to: nopti [X86,PPC]
-                                              if nokaslr then kpti=0 [ARM64]
-                                              nospectre_v1 [X86,PPC]
-                                              nobp=0 [S390]
-                                              nospectre_v2 [X86,PPC,S390,ARM64]
-                                              spectre_v2_user=off [X86]
-                                              spec_store_bypass_disable=off [X86,PPC]
-                                              ssbd=force-off [ARM64]
-                                              nospectre_bhb [ARM64]
+                               Equivalent to: if nokaslr then kpti=0 [ARM64]
+                                              gather_data_sampling=off [X86]
+                                              kvm.nx_huge_pages=off [X86]
                                               l1tf=off [X86]
                                               mds=off [X86]
-                                              tsx_async_abort=off [X86]
-                                              kvm.nx_huge_pages=off [X86]
-                                              srbds=off [X86,INTEL]
+                                              mmio_stale_data=off [X86]
                                               no_entry_flush [PPC]
                                               no_uaccess_flush [PPC]
-                                              mmio_stale_data=off [X86]
+                                              nobp=0 [S390]
+                                              nopti [X86,PPC]
+                                              nospectre_bhb [ARM64]
+                                              nospectre_v1 [X86,PPC]
+                                              nospectre_v2 [X86,PPC,S390,ARM64]
                                               retbleed=off [X86]
+                                              spec_store_bypass_disable=off [X86,PPC]
+                                              spectre_v2_user=off [X86]
+                                              srbds=off [X86,INTEL]
+                                              ssbd=force-off [ARM64]
+                                              tsx_async_abort=off [X86]
 
                                Exceptions:
                                               This does not have any effect on
                        Not specifying this option is equivalent to
                        spectre_v2_user=auto.
 
+       spec_rstack_overflow=
+                       [X86] Control RAS overflow mitigation on AMD Zen CPUs
+
+                       off             - Disable mitigation
+                       microcode       - Enable microcode mitigation only
+                       safe-ret        - Enable sw-only safe RET mitigation (default)
+                       ibpb            - Enable mitigation by issuing IBPB on
+                                         kernel entry
+                       ibpb-vmexit     - Issue IBPB only on VMEXIT
+                                         (cloud-specific mitigation)
+
        spec_store_bypass_disable=
                        [HW] Control Speculative Store Bypass (SSB) Disable mitigation
                        (Speculative Store Bypass vulnerability)
index 72d2e91..2594fa1 100644 (file)
@@ -216,7 +216,6 @@ properties:
     description: Whether to enable burnout current for EXT1.
 
   adi,ext1-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description:
       Burnout current in nanoamps to be applied to EXT1.
     enum: [0, 50, 500, 1000, 10000]
@@ -233,7 +232,6 @@ properties:
     description: Whether to enable burnout current for EXT2.
 
   adi,ext2-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description: Burnout current in nanoamps to be applied to EXT2.
     enum: [0, 50, 500, 1000, 10000]
     default: 0
@@ -249,7 +247,6 @@ properties:
     description: Whether to enable burnout current for VIOUT.
 
   adi,viout-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description: Burnout current in nanoamps to be applied to VIOUT.
     enum: [0, 1000, 10000]
     default: 0
index e608a4f..e119a22 100644 (file)
@@ -87,7 +87,7 @@ $defs:
                 emac0_mdc, emac0_mdio, emac0_ptp_aux, emac0_ptp_pps, emac1_mcg0,
                 emac1_mcg1, emac1_mcg2, emac1_mcg3, emac1_mdc, emac1_mdio,
                 emac1_ptp_aux, emac1_ptp_pps, gcc_gp1, gcc_gp2, gcc_gp3,
-                gcc_gp4, gcc_gp5, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
+                gcc_gp4, gcc_gp5, gpio, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
                 jitter_bist, mdp0_vsync0, mdp0_vsync1, mdp0_vsync2, mdp0_vsync3,
                 mdp0_vsync4, mdp0_vsync5, mdp0_vsync6, mdp0_vsync7, mdp0_vsync8,
                 mdp1_vsync0, mdp1_vsync1, mdp1_vsync2, mdp1_vsync3, mdp1_vsync4,
index b7d3ae7..41ddc10 100644 (file)
@@ -46,7 +46,7 @@ driver model device node, and its I2C address.
        },
 
        .id_table       = foo_idtable,
-       .probe_new      = foo_probe,
+       .probe          = foo_probe,
        .remove         = foo_remove,
        /* if device autodetection is needed: */
        .class          = I2C_CLASS_SOMETHING,
index 8b1045c..c383a39 100644 (file)
@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
        Default is set to (hb_interval * path_max_retrans + rto_max)
 
 nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
-       default 0.3
+       default 3
 
 nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
-       default 0.3
+       default 3
 
 nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
        default 3
index 0f966f0..87e1e0e 100644 (file)
@@ -2339,7 +2339,7 @@ F:        drivers/phy/mediatek/
 ARM/MICROCHIP (ARM64) SoC support
 M:     Conor Dooley <conor@kernel.org>
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 T:     git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
@@ -2348,7 +2348,7 @@ F:        arch/arm64/boot/dts/microchip/
 ARM/Microchip (AT91) SoC support
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
 M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 W:     http://www.linux4sam.org
@@ -3250,7 +3250,7 @@ F:        include/uapi/linux/atm*
 
 ATMEL MACB ETHERNET DRIVER
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 S:     Supported
 F:     drivers/net/ethernet/cadence/
 
@@ -8812,6 +8812,7 @@ R:        Michael Walle <michael@walle.cc>
 S:     Maintained
 F:     drivers/gpio/gpio-regmap.c
 F:     include/linux/gpio/regmap.h
+K:     (devm_)?gpio_regmap_(un)?register
 
 GPIO SUBSYSTEM
 M:     Linus Walleij <linus.walleij@linaro.org>
@@ -9375,7 +9376,6 @@ F:        drivers/crypto/hisilicon/sgl.c
 F:     include/linux/hisi_acc_qm.h
 
 HISILICON ROCE DRIVER
-M:     Haoyue Xu <xuhaoyue1@hisilicon.com>
 M:     Junxian Huang <huangjunxian6@hisilicon.com>
 L:     linux-rdma@vger.kernel.org
 S:     Maintained
@@ -11500,6 +11500,8 @@ F:      arch/x86/include/uapi/asm/svm.h
 F:     arch/x86/include/uapi/asm/vmx.h
 F:     arch/x86/kvm/
 F:     arch/x86/kvm/*/
+F:     tools/testing/selftests/kvm/*/x86_64/
+F:     tools/testing/selftests/kvm/x86_64/
 
 KERNFS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -12480,6 +12482,7 @@ F:      net/mctp/
 
 MAPLE TREE
 M:     Liam R. Howlett <Liam.Howlett@oracle.com>
+L:     maple-tree@lists.infradead.org
 L:     linux-mm@kvack.org
 S:     Supported
 F:     Documentation/core-api/maple_tree.rst
@@ -13786,7 +13789,7 @@ F:      Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
 F:     drivers/spi/spi-at91-usart.c
 
 MICROCHIP AUDIO ASOC DRIVERS
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/sound/atmel*
@@ -13809,7 +13812,7 @@ S:      Maintained
 F:     drivers/crypto/atmel-ecc.*
 
 MICROCHIP EIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml
@@ -13882,7 +13885,7 @@ F:      drivers/video/fbdev/atmel_lcdfb.c
 F:     include/video/atmel_lcdc.h
 
 MICROCHIP MCP16502 PMIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
@@ -13909,7 +13912,7 @@ F:      Documentation/devicetree/bindings/mtd/atmel-nand.txt
 F:     drivers/mtd/nand/raw/atmel/*
 
 MICROCHIP OTPC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml
@@ -13948,7 +13951,7 @@ F:      Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
 F:     drivers/fpga/microchip-spi.c
 
 MICROCHIP PWM DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-pwm@vger.kernel.org
 S:     Supported
@@ -13964,7 +13967,7 @@ F:      drivers/iio/adc/at91-sama5d2_adc.c
 F:     include/dt-bindings/iio/adc/at91-sama5d2_adc.h
 
 MICROCHIP SAMA5D2-COMPATIBLE SHUTDOWN CONTROLLER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 S:     Supported
 F:     Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml
 F:     drivers/power/reset/at91-sama5d2_shdwc.c
@@ -13981,7 +13984,7 @@ S:      Supported
 F:     drivers/spi/spi-atmel.*
 
 MICROCHIP SSC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -14010,7 +14013,7 @@ F:      drivers/usb/gadget/udc/atmel_usba_udc.*
 
 MICROCHIP WILC1000 WIFI DRIVER
 M:     Ajay Singh <ajay.kathat@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 F:     drivers/net/wireless/microchip/wilc1000/
@@ -14802,6 +14805,16 @@ F:     net/netfilter/xt_CONNSECMARK.c
 F:     net/netfilter/xt_SECMARK.c
 F:     net/netlabel/
 
+NETWORKING [MACSEC]
+M:     Sabrina Dubroca <sd@queasysnail.net>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/net/macsec.c
+F:     include/net/macsec.h
+F:     include/uapi/linux/if_macsec.h
+K:     macsec
+K:     \bmdo_
+
 NETWORKING [MPTCP]
 M:     Matthieu Baerts <matthieu.baerts@tessares.net>
 M:     Mat Martineau <martineau@kernel.org>
@@ -16293,6 +16306,7 @@ F:      drivers/pci/controller/dwc/pci-exynos.c
 PCI DRIVER FOR SYNOPSYS DESIGNWARE
 M:     Jingoo Han <jingoohan1@gmail.com>
 M:     Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -18508,17 +18522,14 @@ RTL8180 WIRELESS DRIVER
 L:     linux-wireless@vger.kernel.org
 S:     Orphan
 W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:     drivers/net/wireless/realtek/rtl818x/rtl8180/
 
 RTL8187 WIRELESS DRIVER
-M:     Herton Ronaldo Krzesinski <herton@canonical.com>
-M:     Hin-Tak Leung <htl10@users.sourceforge.net>
+M:     Hin-Tak Leung <hintak.leung@gmail.com>
 M:     Larry Finger <Larry.Finger@lwfinger.net>
 L:     linux-wireless@vger.kernel.org
 S:     Maintained
 W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:     drivers/net/wireless/realtek/rtl818x/rtl8187/
 
 RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
@@ -19225,13 +19236,6 @@ F:     Documentation/devicetree/bindings/serial/serial.yaml
 F:     drivers/tty/serdev/
 F:     include/linux/serdev.h
 
-SERIAL DRIVERS
-M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     linux-serial@vger.kernel.org
-S:     Maintained
-F:     Documentation/devicetree/bindings/serial/
-F:     drivers/tty/serial/
-
 SERIAL IR RECEIVER
 M:     Sean Young <sean@mess.org>
 L:     linux-media@vger.kernel.org
@@ -21060,6 +21064,39 @@ S:     Maintained
 F:     Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml
 F:     sound/soc/ti/
 
+TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
+M:     Shenghao Ding <shenghao-ding@ti.com>
+M:     Kevin Lu <kevin-lu@ti.com>
+M:     Baojun Xu <x1077012@ti.com>
+L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:     Maintained
+F:     Documentation/devicetree/bindings/sound/tas2552.txt
+F:     Documentation/devicetree/bindings/sound/tas2562.yaml
+F:     Documentation/devicetree/bindings/sound/tas2770.yaml
+F:     Documentation/devicetree/bindings/sound/tas27xx.yaml
+F:     Documentation/devicetree/bindings/sound/ti,pcm1681.txt
+F:     Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml
+F:     Documentation/devicetree/bindings/sound/ti,tlv320*.yaml
+F:     Documentation/devicetree/bindings/sound/tlv320adcx140.yaml
+F:     Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+F:     Documentation/devicetree/bindings/sound/tpa6130a2.txt
+F:     include/sound/tas2*.h
+F:     include/sound/tlv320*.h
+F:     include/sound/tpa6130a2-plat.h
+F:     sound/pci/hda/tas2781_hda_i2c.c
+F:     sound/soc/codecs/pcm1681.c
+F:     sound/soc/codecs/pcm1789*.*
+F:     sound/soc/codecs/pcm179x*.*
+F:     sound/soc/codecs/pcm186x*.*
+F:     sound/soc/codecs/pcm3008.*
+F:     sound/soc/codecs/pcm3060*.*
+F:     sound/soc/codecs/pcm3168a*.*
+F:     sound/soc/codecs/pcm5102a.c
+F:     sound/soc/codecs/pcm512x*.*
+F:     sound/soc/codecs/tas2*.*
+F:     sound/soc/codecs/tlv320*.*
+F:     sound/soc/codecs/tpa6130a2.*
+
 TEXAS INSTRUMENTS DMA DRIVERS
 M:     Peter Ujfalusi <peter.ujfalusi@gmail.com>
 L:     dmaengine@vger.kernel.org
@@ -21636,20 +21673,16 @@ W:    https://github.com/srcres258/linux-doc
 T:     git git://github.com/srcres258/linux-doc.git doc-zh-tw
 F:     Documentation/translations/zh_TW/
 
-TTY LAYER
+TTY LAYER AND SERIAL DRIVERS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:     Jiri Slaby <jirislaby@kernel.org>
 L:     linux-kernel@vger.kernel.org
 L:     linux-serial@vger.kernel.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
+F:     Documentation/devicetree/bindings/serial/
 F:     Documentation/driver-api/serial/
 F:     drivers/tty/
-F:     drivers/tty/serial/serial_base.h
-F:     drivers/tty/serial/serial_base_bus.c
-F:     drivers/tty/serial/serial_core.c
-F:     drivers/tty/serial/serial_ctrl.c
-F:     drivers/tty/serial/serial_port.c
 F:     include/linux/selection.h
 F:     include/linux/serial.h
 F:     include/linux/serial_core.h
@@ -22476,7 +22509,6 @@ L:      virtualization@lists.linux-foundation.org
 S:     Maintained
 F:     drivers/block/virtio_blk.c
 F:     drivers/scsi/virtio_scsi.c
-F:     drivers/vhost/scsi.c
 F:     include/uapi/linux/virtio_blk.h
 F:     include/uapi/linux/virtio_scsi.h
 
@@ -22575,6 +22607,16 @@ F:     include/linux/vhost_iotlb.h
 F:     include/uapi/linux/vhost.h
 F:     kernel/vhost_task.c
 
+VIRTIO HOST (VHOST-SCSI)
+M:     "Michael S. Tsirkin" <mst@redhat.com>
+M:     Jason Wang <jasowang@redhat.com>
+M:     Mike Christie <michael.christie@oracle.com>
+R:     Paolo Bonzini <pbonzini@redhat.com>
+R:     Stefan Hajnoczi <stefanha@redhat.com>
+L:     virtualization@lists.linux-foundation.org
+S:     Maintained
+F:     drivers/vhost/scsi.c
+
 VIRTIO I2C DRIVER
 M:     Conghui Chen <conghui.chen@intel.com>
 M:     Viresh Kumar <viresh.kumar@linaro.org>
index 6bbf9db..2fdd8b4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index 714abe4..55bb1c0 100644 (file)
@@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p);
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-#ifndef CONFIG_SMP
-/* Nothing to prefetch. */
-#define spin_lock_prefetch(lock)       do { } while (0)
-#endif
 
 extern inline void prefetch(const void *ptr)  
 { 
@@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr)
        __builtin_prefetch(ptr, 1, 3);
 }
 
-#ifdef CONFIG_SMP
-extern inline void spin_lock_prefetch(const void *ptr)  
-{
-       __builtin_prefetch(ptr, 1, 3);
-}
-#endif
-
 #endif /* __ASM_ALPHA_PROCESSOR_H */
index b650ff1..3d74735 100644 (file)
@@ -385,8 +385,7 @@ setup_memory(void *kernel_end)
 #endif /* CONFIG_BLK_DEV_INITRD */
 }
 
-int __init
-page_is_ram(unsigned long pfn)
+int page_is_ram(unsigned long pfn)
 {
        struct memclust_struct * cluster;
        struct memdesc_struct * memdesc;
index 5b52d75..d9927d3 100644 (file)
                valid-mask = <0x003fffff>;
        };
 
-       pci: pciv3@62000000 {
+       pci: pci@62000000 {
                compatible = "arm,integrator-ap-pci", "v3,v360epc-pci";
                device_type = "pci";
                #interrupt-cells = <1>;
index 1a599c2..1ca4d21 100644 (file)
                pinctrl-0 = <&pinctrl_rtc_int>;
                reg = <0x68>;
                interrupt-parent = <&gpio7>;
-               interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
                status = "disabled";
        };
 };
index 3a43086..a05069d 100644 (file)
                                                        reg = <0>;
 
                                                        ldb_from_lcdif1: endpoint {
-                                                               remote-endpoint = <&lcdif1_to_ldb>;
                                                        };
                                                };
 
                                         <&clks IMX6SX_CLK_USDHC1>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                         <&clks IMX6SX_CLK_USDHC2>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                         <&clks IMX6SX_CLK_USDHC3>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                        power-domains = <&pd_disp>;
                                        status = "disabled";
 
-                                       ports {
-                                               port {
-                                                       lcdif1_to_ldb: endpoint {
-                                                               remote-endpoint = <&ldb_from_lcdif1>;
-                                                       };
+                                       port {
+                                               lcdif1_to_ldb: endpoint {
                                                };
                                        };
                                };
index 54026c2..6ffb428 100644 (file)
                                        <&clks IMX7D_USDHC1_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
                                        <&clks IMX7D_USDHC2_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
                                        <&clks IMX7D_USDHC3_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
index b958607..96451c8 100644 (file)
                        /* MDIO */
                        AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLUP | SLEWCTRL_FAST, MUX_MODE0)
                        AM33XX_PADCONF(AM335X_PIN_MDC, PIN_OUTPUT_PULLUP, MUX_MODE0)
+                       /* Added to support GPIO controlled PHY reset */
+                       AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT_PULLUP, MUX_MODE7)
                >;
        };
 
                        /* MDIO reset value */
                        AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLDOWN, MUX_MODE7)
                        AM33XX_PADCONF(AM335X_PIN_MDC, PIN_INPUT_PULLDOWN, MUX_MODE7)
+                       /* Added to support GPIO controlled PHY reset */
+                       AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE7)
                >;
        };
 
        baseboard_eeprom: baseboard_eeprom@50 {
                compatible = "atmel,24c256";
                reg = <0x50>;
+               vcc-supply = <&ldo4_reg>;
 
                #address-cells = <1>;
                #size-cells = <1>;
 
        ethphy0: ethernet-phy@0 {
                reg = <0>;
+               /* Support GPIO reset on revision C3 boards */
+               reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+               reset-assert-us = <300>;
+               reset-deassert-us = <6500>;
        };
 };
 
index f3cd04f..72529f5 100644 (file)
@@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
        return false;
 }
 
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
+
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
index 8ba450a..61ad965 100644 (file)
@@ -8,8 +8,8 @@
  */
 
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include "common.h"
 
 /* register offsets */
index d6b36f0..1a647d4 100644 (file)
                                compatible = "fsl,imx8mm-mipi-csi2";
                                reg = <0x32e30000 0x1000>;
                                interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
-                               assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>,
-                                                 <&clk IMX8MM_CLK_CSI1_PHY_REF>;
-                               assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>,
-                                                         <&clk IMX8MM_SYS_PLL2_1000M>;
+                               assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>;
+                               assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>;
+
                                clock-frequency = <333000000>;
                                clocks = <&clk IMX8MM_CLK_DISP_APB_ROOT>,
                                         <&clk IMX8MM_CLK_CSI1_ROOT>,
index 9869fe7..aa38dd6 100644 (file)
                                compatible = "fsl,imx8mm-mipi-csi2";
                                reg = <0x32e30000 0x1000>;
                                interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
-                               assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>,
-                                                 <&clk IMX8MN_CLK_CSI1_PHY_REF>;
-                               assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>,
-                                                         <&clk IMX8MN_SYS_PLL2_1000M>;
+                               assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>;
+                               assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>;
                                assigned-clock-rates = <333000000>;
                                clock-frequency = <333000000>;
                                clocks = <&clk IMX8MN_CLK_DISP_APB_ROOT>,
index 8643612..1d8dd14 100644 (file)
 
                        anatop: anatop@44480000 {
                                compatible = "fsl,imx93-anatop", "syscon";
-                               reg = <0x44480000 0x10000>;
+                               reg = <0x44480000 0x2000>;
                        };
 
                        adc1: adc@44530000 {
index 9022ad7..a9e7b83 100644 (file)
                        };
                };
 
-               pm8150l-thermal {
+               pm8150l-pcb-thermal {
                        polling-delay-passive = <0>;
                        polling-delay = <0>;
                        thermal-sensors = <&pm8150l_adc_tm 1>;
index ab767cf..26f5a4e 100644 (file)
 
                vreg_l4c: ldo4 {
                        regulator-name = "vreg_l4c";
-                       regulator-min-microvolt = <1100000>;
-                       regulator-max-microvolt = <1300000>;
+                       regulator-min-microvolt = <1200000>;
+                       regulator-max-microvolt = <1200000>;
                        regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
                        /*
                         * FIXME: This should have regulator-allow-set-load but
index e25dc2b..06df931 100644 (file)
                                reg = <0 0x0ae94400 0 0x200>,
                                      <0 0x0ae94600 0 0x280>,
                                      <0 0x0ae94a00 0 0x1e0>;
-                               reg-names = "dsi0_phy",
-                                           "dsi0_phy_lane",
+                               reg-names = "dsi_phy",
+                                           "dsi_phy_lane",
                                            "dsi_pll";
 
                                #clock-cells = <1>;
index d3ae185..be78a93 100644 (file)
                };
 
                osm_l3: interconnect@18321000 {
-                       compatible = "qcom,sc8180x-osm-l3";
+                       compatible = "qcom,sc8180x-osm-l3", "qcom,osm-l3";
                        reg = <0 0x18321000 0 0x1400>;
 
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
index 18c822a..b46e55b 100644 (file)
@@ -56,7 +56,7 @@
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD0>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
@@ -85,7 +85,7 @@
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD1>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD2>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD3>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD4>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD5>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD6>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 2>;
                        operating-points-v2 = <&cpu7_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD7>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
                        clock-names = "xo", "alternate";
 
-                       #interconnect-cells = <2>;
+                       #interconnect-cells = <1>;
                };
 
                cpufreq_hw: cpufreq@18323000 {
index 83ab6de..1efa07f 100644 (file)
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_0: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_100: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_200: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_300: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_400: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_500: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_600: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 2>;
                        operating-points-v2 = <&cpu7_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_700: l2-cache {
                                compatible = "cache";
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
                        clock-names = "xo", "alternate";
 
-                       #interconnect-cells = <2>;
+                       #interconnect-cells = <1>;
                };
 
                cpufreq_hw: cpufreq@18591000 {
index 88ef478..ec451c6 100644 (file)
                        qcom,controlled-remotely;
                        iommus = <&apps_smmu 0x594 0x0011>,
                                 <&apps_smmu 0x596 0x0011>;
+                       /* FIXME: Probing BAM DMA causes some abort and system hang */
+                       status = "fail";
                };
 
                crypto: crypto@1dfa000 {
                                 <&apps_smmu 0x596 0x0011>;
                        interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>;
                        interconnect-names = "memory";
+                       /* FIXME: dependency BAM DMA is disabled */
+                       status = "disabled";
                };
 
                ipa: ipa@1e40000 {
index 8332c8a..42ce78b 100644 (file)
                        };
                        power-domain@PX30_PD_MMC_NAND {
                                reg = <PX30_PD_MMC_NAND>;
-                               clocks =  <&cru HCLK_NANDC>,
-                                         <&cru HCLK_EMMC>,
-                                         <&cru HCLK_SDIO>,
-                                         <&cru HCLK_SFC>,
-                                         <&cru SCLK_EMMC>,
-                                         <&cru SCLK_NANDC>,
-                                         <&cru SCLK_SDIO>,
-                                         <&cru SCLK_SFC>;
+                               clocks = <&cru HCLK_NANDC>,
+                                        <&cru HCLK_EMMC>,
+                                        <&cru HCLK_SDIO>,
+                                        <&cru HCLK_SFC>,
+                                        <&cru SCLK_EMMC>,
+                                        <&cru SCLK_NANDC>,
+                                        <&cru SCLK_SDIO>,
+                                        <&cru SCLK_SFC>;
                                pm_qos = <&qos_emmc>, <&qos_nand>,
                                         <&qos_sdio>, <&qos_sfc>;
                                #power-domain-cells = <0>;
index 7ea4816..9232357 100644 (file)
                regulator-name = "vdd_core";
                regulator-min-microvolt = <827000>;
                regulator-max-microvolt = <1340000>;
-               regulator-init-microvolt = <1015000>;
                regulator-settling-time-up-us = <250>;
                regulator-always-on;
                regulator-boot-on;
index a71f249..e9810d2 100644 (file)
                regulator-name = "vdd_core";
                regulator-min-microvolt = <827000>;
                regulator-max-microvolt = <1340000>;
-               regulator-init-microvolt = <1015000>;
                regulator-settling-time-up-us = <250>;
                regulator-always-on;
                regulator-boot-on;
index d1f3433..6464ef4 100644 (file)
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio0>;
-               interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_l>;
index b6e082f..7c5f441 100644 (file)
                        vcc_sdio: LDO_REG4 {
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <3000000>;
                                regulator-min-microvolt = <1800000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-name = "vcc_sdio";
index 028eb50..8bfd5f8 100644 (file)
 &sdhci {
        max-frequency = <150000000>;
        bus-width = <8>;
-       mmc-hs400-1_8v;
+       mmc-hs200-1_8v;
        non-removable;
-       mmc-hs400-enhanced-strobe;
        status = "okay";
 };
 
index 907071d..980c453 100644 (file)
@@ -45,7 +45,7 @@
        sdio_pwrseq: sdio-pwrseq {
                compatible = "mmc-pwrseq-simple";
                clocks = <&rk808 1>;
-               clock-names = "ext_clock";
+               clock-names = "lpo";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_enable_h>;
                reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
 };
 
 &sdhci {
+       max-frequency = <150000000>;
        bus-width = <8>;
-       mmc-hs400-1_8v;
-       mmc-hs400-enhanced-strobe;
+       mmc-hs200-1_8v;
        non-removable;
        status = "okay";
 };
index cec3b7b..8a17c1e 100644 (file)
@@ -31,7 +31,7 @@
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio0>;
-               interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_l>;
index a2c31d5..8cbf3d9 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_logic";
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_gpu";
                regulator-boot-on;
                regulator-min-microvolt = <712500>;
                regulator-max-microvolt = <1390000>;
-               regulator-init-microvolt = <900000>;
                regulator-name = "vdd_cpu";
                regulator-ramp-delay = <2300>;
                vin-supply = <&vcc_sys>;
index 410cd3e..0c18406 100644 (file)
 
 &gmac1 {
        assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
-       assigned-clock-parents =  <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
+       assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
        phy-mode = "rgmii";
        clock_in_out = "input";
        pinctrl-names = "default";
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio2>;
-               interrupts = <RK_PB2 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PB2 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_h>;
index ff936b7..1c6d83b 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 8d61f82..d899087 100644 (file)
                                regulator-always-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-name = "vdd_gpu_npu";
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 25a8c78..854d02b 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_logic";
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_gpu";
index b276eb0..2d92713 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
index 5e4236a..1b1c67d 100644 (file)
 
 &mdio1 {
        rgmii_phy1: ethernet-phy@0 {
-               compatible="ethernet-phy-ieee802.3-c22";
-               reg= <0x0>;
+               compatible = "ethernet-phy-ieee802.3-c22";
+               reg = <0x0>;
        };
 };
 
index 42889c5..938092f 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
                                regulator-name = "vdd_gpu";
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
index 31aa2b8..63bae36 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                        regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_npu";
                                regulator-state-mem {
index ff0bf24..f9127dd 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 6747925..19f8fc3 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 25e2056..89e84e3 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vcca1v8_image: LDO_REG9 {
                                regulator-name = "vcca1v8_image";
-                               regulator-init-microvolt = <950000>;
                                regulator-min-microvolt = <950000>;
                                regulator-max-microvolt = <1800000>;
 
index e653b06..a8a4cc1 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 58ba328..93189f8 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 59ecf86..a337f54 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index c50fbdd..45b03dc 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 917f5b2..e05ab11 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index afda976..5153703 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <550000>;
                                regulator-max-microvolt = <950000>;
-                               regulator-init-microvolt = <750000>;
                                regulator-ramp-delay = <12500>;
                                regulator-name = "vdd_vdenc_s0";
 
index 4d9ed2a..1a60a27 100644 (file)
        cpu-supply = <&vdd_cpu_lit_s0>;
 };
 
-&cpu_b0{
+&cpu_b0 {
        cpu-supply = <&vdd_cpu_big0_s0>;
 };
 
-&cpu_b1{
+&cpu_b1 {
        cpu-supply = <&vdd_cpu_big0_s0>;
 };
 
-&cpu_b2{
+&cpu_b2 {
        cpu-supply = <&vdd_cpu_big1_s0>;
 };
 
-&cpu_b3{
+&cpu_b3 {
        cpu-supply = <&vdd_cpu_big1_s0>;
 };
 
index 8e5ffb5..b7afaa0 100644 (file)
 .Lskip_hcrx_\@:
 .endm
 
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+       mrs     \tmp, hcr_el2
+       and     \tmp, \tmp, #HCR_E2H
+       cbz     \tmp, \fail
+.endm
+
 /*
  * Allow Non-secure EL1 and EL0 to access physical timer and counter.
  * This is not necessary for VHE, since the host kernel runs in EL2,
@@ -43,9 +50,7 @@
  */
 .macro __init_el2_timers
        mov     x0, #3                          // Enable EL1 physical timers
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
        lsl     x0, x0, #10
 .LnVHE_\@:
        msr     cnthctl_el2, x0
 
 /* Coprocessor traps */
 .macro __init_el2_cptr
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
        mov     x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
 .LnVHE_\@:
        mov     x0, #0x33ff
-.Lset_cptr_\@:
        msr     cptr_el2, x0                    // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
 .endm
 
 /* Disable any fine grained traps */
        check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
 
 .Linit_sve_\@: /* SVE register access */
-       mrs     x0, cptr_el2                    // Disable SVE traps
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .Lcptr_nvhe_\@
+       __check_hvhe .Lcptr_nvhe_\@, x1
 
-       // VHE case
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SVE traps
        orr     x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
 
 .Lcptr_nvhe_\@: // nVHE case
+       mrs     x0, cptr_el2                    // Disable SVE traps
        bic     x0, x0, #CPTR_EL2_TZ
-.Lset_cptr_\@:
        msr     cptr_el2, x0
+.Lskip_set_cptr_\@:
        isb
        mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
        msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
        check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
 
 .Linit_sme_\@: /* SME register access and priority mapping */
+       __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SME traps
+       orr     x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
        mrs     x0, cptr_el2                    // Disable SME traps
        bic     x0, x0, #CPTR_EL2_TSM
        msr     cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
        isb
 
        mrs     x1, sctlr_el2
index 67f2fb7..8df46f1 100644 (file)
@@ -356,7 +356,7 @@ static inline int sme_max_virtualisable_vl(void)
        return vec_max_virtualisable_vl(ARM64_VEC_SME);
 }
 
-extern void sme_alloc(struct task_struct *task);
+extern void sme_alloc(struct task_struct *task, bool flush);
 extern unsigned int sme_get_vl(void);
 extern int sme_set_current_vl(unsigned long arg);
 extern int sme_get_current_vl(void);
@@ -388,7 +388,7 @@ static inline void sme_smstart_sm(void) { }
 static inline void sme_smstop_sm(void) { }
 static inline void sme_smstop(void) { }
 
-static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
 static inline void sme_setup(void) { }
 static inline unsigned int sme_get_vl(void) { return 0; }
 static inline int sme_max_vl(void) { return 0; }
index 58e5eb2..5882b24 100644 (file)
 #define HCR_DCT                (UL(1) << 57)
 #define HCR_ATA_SHIFT  56
 #define HCR_ATA                (UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS     (UL(1) << 55)
+#define HCR_TTLBIS     (UL(1) << 54)
+#define HCR_ENSCXT     (UL(1) << 53)
+#define HCR_TOCU       (UL(1) << 52)
 #define HCR_AMVOFFEN   (UL(1) << 51)
+#define HCR_TICAB      (UL(1) << 50)
 #define HCR_TID4       (UL(1) << 49)
 #define HCR_FIEN       (UL(1) << 47)
 #define HCR_FWB                (UL(1) << 46)
+#define HCR_NV2                (UL(1) << 45)
+#define HCR_AT         (UL(1) << 44)
+#define HCR_NV1                (UL(1) << 43)
+#define HCR_NV         (UL(1) << 42)
 #define HCR_API                (UL(1) << 41)
 #define HCR_APK                (UL(1) << 40)
 #define HCR_TEA                (UL(1) << 37)
@@ -89,7 +98,6 @@
                         HCR_BSU_IS | HCR_FB | HCR_TACR | \
                         HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
                         HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
                                 BIT(18) |              \
                                 GENMASK(16, 15))
 
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGWTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51) |    \
+                                BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+                                GENMASK(26, 25) | BIT(21) | BIT(18) |  \
+                                GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGWTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGITR_EL2_RES0      GENMASK(63, 57)
+#define __HFGITR_EL2_MASK      GENMASK(54, 0)
+#define __HFGITR_EL2_nMASK     GENMASK(56, 55)
+
+#define __HDFGRTR_EL2_RES0     (BIT(49) | BIT(42) | GENMASK(39, 38) |  \
+                                GENMASK(21, 20) | BIT(8))
+#define __HDFGRTR_EL2_MASK     ~__HDFGRTR_EL2_nMASK
+#define __HDFGRTR_EL2_nMASK    GENMASK(62, 59)
+
+#define __HDFGWTR_EL2_RES0     (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
+                                BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
+                                BIT(22) | BIT(9) | BIT(6))
+#define __HDFGWTR_EL2_MASK     ~__HDFGWTR_EL2_nMASK
+#define __HDFGWTR_EL2_nMASK    GENMASK(62, 60)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0                (GENMASK(63, 16) | GENMASK(13, 12))
+#define __HCRX_EL2_MASK                (0)
+#define __HCRX_EL2_nMASK       (GENMASK(15, 14) | GENMASK(4, 0))
+
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK     (~UL(0xf))
 /*
index 7d170aa..24b5e6b 100644 (file)
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+       __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
        __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
        __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
        __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
                                         phys_addr_t ipa,
                                         int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                                       phys_addr_t start, unsigned long pages);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
@@ -278,7 +281,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
 asmlinkage void kvm_unexpected_el2_exception(void);
 struct kvm_cpu_context;
 void handle_trap(struct kvm_cpu_context *host_ctxt);
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
 void __noreturn __pkvm_init_finalise(void);
 void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
 void kvm_patch_vector_branch(struct alt_instr *alt,
index efc0b45..3d6725f 100644 (file)
@@ -571,6 +571,14 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
        return test_bit(feature, vcpu->arch.features);
 }
 
+static __always_inline void kvm_write_cptr_el2(u64 val)
+{
+       if (has_vhe() || has_hvhe())
+               write_sysreg(val, cpacr_el1);
+       else
+               write_sysreg(val, cptr_el2);
+}
+
 static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 {
        u64 val;
@@ -578,8 +586,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
        if (has_vhe()) {
                val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
                       CPACR_EL1_ZEN_EL1EN);
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN;
        } else if (has_hvhe()) {
                val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+
+               if (!vcpu_has_sve(vcpu) ||
+                   (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+                       val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
        } else {
                val = CPTR_NVHE_EL2_RES1;
 
@@ -597,9 +613,6 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
 {
        u64 val = kvm_get_reset_cptr_el2(vcpu);
 
-       if (has_vhe() || has_hvhe())
-               write_sysreg(val, cpacr_el1);
-       else
-               write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
 }
 #endif /* __ARM64_KVM_EMULATE_H__ */
index d3dd05b..af06ccb 100644 (file)
@@ -49,6 +49,7 @@
 #define KVM_REQ_RELOAD_GICv4   KVM_ARCH_REQ(4)
 #define KVM_REQ_RELOAD_PMU     KVM_ARCH_REQ(5)
 #define KVM_REQ_SUSPEND                KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
@@ -380,6 +381,7 @@ enum vcpu_sysreg {
        CPTR_EL2,       /* Architectural Feature Trap Register (EL2) */
        HSTR_EL2,       /* Hypervisor System Trap Register */
        HACR_EL2,       /* Hypervisor Auxiliary Control Register */
+       HCRX_EL2,       /* Extended Hypervisor Configuration Register */
        TTBR0_EL2,      /* Translation Table Base Register 0 (EL2) */
        TTBR1_EL2,      /* Translation Table Base Register 1 (EL2) */
        TCR_EL2,        /* Translation Control Register (EL2) */
@@ -400,6 +402,11 @@ enum vcpu_sysreg {
        TPIDR_EL2,      /* EL2 Software Thread ID Register */
        CNTHCTL_EL2,    /* Counter-timer Hypervisor Control register */
        SP_EL2,         /* EL2 Stack Pointer */
+       HFGRTR_EL2,
+       HFGWTR_EL2,
+       HFGITR_EL2,
+       HDFGRTR_EL2,
+       HDFGWTR_EL2,
        CNTHP_CTL_EL2,
        CNTHP_CVAL_EL2,
        CNTHV_CTL_EL2,
@@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
        /* Cache some mmu pages needed inside spinlock regions */
        struct kvm_mmu_memory_cache mmu_page_cache;
 
-       /* Target CPU and feature flags */
-       int target;
+       /* feature flags */
        DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
        /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
@@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
 #define VCPU_SVE_FINALIZED     __vcpu_single_flag(cflags, BIT(1))
 /* PTRAUTH exposed to guest */
 #define GUEST_HAS_PTRAUTH      __vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED       __vcpu_single_flag(cflags, BIT(3))
 
 /* Exception pending */
 #define PENDING_EXCEPTION      __vcpu_single_flag(iflags, BIT(0))
@@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
        u64 exits;
 };
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
 #endif /* __KVM_NVHE_HYPERVISOR__ */
 
-void force_vm_exit(const cpumask_t *mask);
-
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
@@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
 
 int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
 
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
@@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
        return cpus_have_const_cap(ARM64_SPECTRE_V3A);
 }
 
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
@@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
        return false;
 }
 
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
-
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
index 0e1e1ab..96a80e8 100644 (file)
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
                           void __iomem **haddr);
 int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
                             void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
 void __init free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
index 8fb67f0..fa23cc9 100644 (file)
@@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
                test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
 }
 
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
 struct sys_reg_params;
 struct sys_reg_desc;
 
index 929d355..d3e354b 100644 (file)
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  *        kvm_pgtable_prot format.
  */
 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu:       Stage-2 KVM MMU struct
+ * @addr:      The base Intermediate physical address from which to invalidate
+ * @size:      Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
index 3918f2a..e5bc545 100644 (file)
@@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr)
        asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
 }
 
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-                    "prfm pstl1strm, %a0",
-                    "nop") : : "p" (ptr));
-}
-
 extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 extern void __init minsigstksz_setup(void);
 
index b481935..818c111 100644 (file)
 #define SYS_DC_CIGSW                   sys_insn(1, 0, 7, 14, 4)
 #define SYS_DC_CIGDSW                  sys_insn(1, 0, 7, 14, 6)
 
+#define SYS_IC_IALLUIS                 sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU                   sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU                    sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC                    sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC                   sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC                  sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC                    sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC                   sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC                  sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU                    sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP                    sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP                   sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP                  sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP                   sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP                  sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP                 sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC                   sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC                  sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC                 sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA                     sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA                     sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA                    sys_insn(1, 3, 7, 4, 4)
+
 /*
  * Automatically generated definitions for system registers, the
  * manual encodings below are in the process of being converted to
 #define SYS_DBGDTRTX_EL0               sys_reg(2, 3, 0, 5, 0)
 #define SYS_DBGVCR32_EL2               sys_reg(2, 4, 0, 7, 0)
 
+#define SYS_BRBINF_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1              sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1              sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1              sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1                  sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1                  sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1                 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1                        sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1               sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m)                        sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m)                 sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS              sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR                 sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR                  sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR                  sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0               sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1               sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m)               sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR                        sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET                        sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m)              sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m)             sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m)                        sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR                 sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH                 sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID                   sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R              sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R              sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m)            sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0                    sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10                   sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11                   sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12                   sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13                   sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1                    sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2                    sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3                    sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4                    sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5                    sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6                    sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7                    sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8                    sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9                    sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m)               sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR                 sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR                   sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR                 sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR                   sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m)               sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR                     sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m)               sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR               sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR                  sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m)                        sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m)                        sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m)              sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR               sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR                   sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR                  sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR                        sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR                  sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR                  sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR                        sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR              sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR                        sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0              sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1              sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m)              sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR                   sys_reg(2, 1, 1, 0, 4)
+
 #define SYS_MIDR_EL1                   sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1                  sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1                 sys_reg(3, 0, 0, 0, 6)
 #define SYS_ERXCTLR_EL1                        sys_reg(3, 0, 5, 4, 1)
 #define SYS_ERXSTATUS_EL1              sys_reg(3, 0, 5, 4, 2)
 #define SYS_ERXADDR_EL1                        sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1                        sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1              sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1              sys_reg(3, 0, 5, 4, 6)
 #define SYS_ERXMISC0_EL1               sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1               sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1               sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1               sys_reg(3, 0, 5, 5, 3)
 #define SYS_TFSR_EL1                   sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1                 sys_reg(3, 0, 5, 6, 1)
 
 #define SYS_ICC_IGRPEN0_EL1            sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1            sys_reg(3, 0, 12, 12, 7)
 
+#define SYS_ACCDATA_EL1                        sys_reg(3, 0, 13, 0, 5)
+
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 #define SYS_VTCR_EL2                   sys_reg(3, 4, 2, 1, 2)
 
 #define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2                        sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2                        sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2                        sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2                    sys_reg(3, 4, 4, 0, 1)
 
 #define SYS_SP_EL2                     sys_reg(3, 6,  4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R    sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W    sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R    sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W    sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP   sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP   sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E2R    sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W    sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R   sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W   sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R   sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W   sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS              sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS                 sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS               sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS                        sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS                        sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS               sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS                        sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS               sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS               sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS              sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS              sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS                 sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS               sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS                        sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS                        sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS               sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS                        sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS               sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS               sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS              sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1                  sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1                 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1                 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1                        sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1                        sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1                   sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1                 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1                  sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1                  sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1                 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS           sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS              sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS            sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS             sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS             sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS            sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS             sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS            sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS            sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS           sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS           sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS              sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS            sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS             sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS             sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS            sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS             sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS            sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS            sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS           sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS               sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS              sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS              sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS             sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS             sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS                        sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS              sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS               sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS               sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS              sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS              sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS             sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS             sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS            sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS                        sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS                 sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS                        sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS                        sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS           sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS                        sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS               sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS                        sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS                 sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS                        sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS                        sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS           sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS              sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1                        sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1               sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS             sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS             sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1               sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1              sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS            sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS                        sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS               sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2                  sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2                 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2                  sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2                   sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1                  sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2                  sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1             sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS           sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS          sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS          sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS         sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS             sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS              sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS             sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS             sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS                sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS             sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS            sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS             sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS              sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS             sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS             sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS                sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS           sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS             sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS            sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS          sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS          sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS            sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS           sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS         sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS             sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS            sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS               sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS              sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS               sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS                        sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS               sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS               sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS          sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_BRB_IALL                    sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ                     sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX                    sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX                    sys_insn(1, 3, 7, 3, 5)
+#define OP_CPP_RCTX                    sys_insn(1, 3, 7, 3, 7)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2        (BIT(60))
 #define SCTLR_ELx_DSSBS        (BIT(44))
index 412a3b9..93f4b39 100644 (file)
@@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLBI_OPS   PTRS_PER_PTE
 
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op:        TLBI instruction that operates on a range (has 'r' prefix)
+ * @start:     The start address of the range
+ * @pages:     Range as the number of pages from 'start'
+ * @stride:    Flush granularity
+ * @asid:      The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ *              (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,                 \
+                               asid, tlb_level, tlbi_user)             \
+do {                                                                   \
+       int num = 0;                                                    \
+       int scale = 0;                                                  \
+       unsigned long addr;                                             \
+                                                                       \
+       while (pages > 0) {                                             \
+               if (!system_supports_tlb_range() ||                     \
+                   pages % 2 == 1) {                                   \
+                       addr = __TLBI_VADDR(start, asid);               \
+                       __tlbi_level(op, addr, tlb_level);              \
+                       if (tlbi_user)                                  \
+                               __tlbi_user_level(op, addr, tlb_level); \
+                       start += stride;                                \
+                       pages -= stride >> PAGE_SHIFT;                  \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               num = __TLBI_RANGE_NUM(pages, scale);                   \
+               if (num >= 0) {                                         \
+                       addr = __TLBI_VADDR_RANGE(start, asid, scale,   \
+                                                 num, tlb_level);      \
+                       __tlbi(r##op, addr);                            \
+                       if (tlbi_user)                                  \
+                               __tlbi_user(r##op, addr);               \
+                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+                       pages -= __TLBI_RANGE_PAGES(num, scale);        \
+               }                                                       \
+               scale++;                                                \
+       }                                                               \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+       __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     unsigned long stride, bool last_level,
                                     int tlb_level)
 {
-       int num = 0;
-       int scale = 0;
-       unsigned long asid, addr, pages;
+       unsigned long asid, pages;
 
        start = round_down(start, stride);
        end = round_up(end, stride);
@@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
        dsb(ishst);
        asid = ASID(vma->vm_mm);
 
-       /*
-        * When the CPU does not support TLB range operations, flush the TLB
-        * entries one by one at the granularity of 'stride'. If the TLB
-        * range ops are supported, then:
-        *
-        * 1. If 'pages' is odd, flush the first page through non-range
-        *    operations;
-        *
-        * 2. For remaining pages: the minimum range granularity is decided
-        *    by 'scale', so multiple range TLBI operations may be required.
-        *    Start from scale = 0, flush the corresponding number of pages
-        *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-        *    until no pages left.
-        *
-        * Note that certain ranges can be represented by either num = 31 and
-        * scale or num = 0 and scale + 1. The loop below favours the latter
-        * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-        */
-       while (pages > 0) {
-               if (!system_supports_tlb_range() ||
-                   pages % 2 == 1) {
-                       addr = __TLBI_VADDR(start, asid);
-                       if (last_level) {
-                               __tlbi_level(vale1is, addr, tlb_level);
-                               __tlbi_user_level(vale1is, addr, tlb_level);
-                       } else {
-                               __tlbi_level(vae1is, addr, tlb_level);
-                               __tlbi_user_level(vae1is, addr, tlb_level);
-                       }
-                       start += stride;
-                       pages -= stride >> PAGE_SHIFT;
-                       continue;
-               }
-
-               num = __TLBI_RANGE_NUM(pages, scale);
-               if (num >= 0) {
-                       addr = __TLBI_VADDR_RANGE(start, asid, scale,
-                                                 num, tlb_level);
-                       if (last_level) {
-                               __tlbi(rvale1is, addr);
-                               __tlbi_user(rvale1is, addr);
-                       } else {
-                               __tlbi(rvae1is, addr);
-                               __tlbi_user(rvae1is, addr);
-                       }
-                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-                       pages -= __TLBI_RANGE_PAGES(num, scale);
-               }
-               scale++;
-       }
+       if (last_level)
+               __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+       else
+               __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
        dsb(ish);
 }
 
diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..485d60b
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
index f9d456f..668e287 100644 (file)
@@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
        },
+       {
+               .desc = "Fine Grained Traps",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_HAS_FGT,
+               .matches = has_cpuid_feature,
+               ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+       },
 #ifdef CONFIG_ARM64_SME
        {
                .desc = "Scalable Matrix Extension",
index 75c37b1..087c05a 100644 (file)
@@ -1285,9 +1285,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * the interest of testability and predictability, the architecture
  * guarantees that when ZA is enabled it will be zeroed.
  */
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
 {
-       if (task->thread.sme_state) {
+       if (task->thread.sme_state && flush) {
                memset(task->thread.sme_state, 0, sme_state_size(task));
                return;
        }
@@ -1515,7 +1515,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
        }
 
        sve_alloc(current, false);
-       sme_alloc(current);
+       sme_alloc(current, true);
        if (!current->thread.sve_state || !current->thread.sme_state) {
                force_sig(SIGKILL);
                return;
index 5b9b430..187aa2b 100644 (file)
@@ -881,6 +881,13 @@ static int sve_set_common(struct task_struct *target,
                        break;
                case ARM64_VEC_SME:
                        target->thread.svcr |= SVCR_SM_MASK;
+
+                       /*
+                        * Disable traps and ensure there is SME storage but
+                        * preserve any currently set values in ZA/ZT.
+                        */
+                       sme_alloc(target, false);
+                       set_tsk_thread_flag(target, TIF_SME);
                        break;
                default:
                        WARN_ON_ONCE(1);
@@ -1100,7 +1107,7 @@ static int za_set(struct task_struct *target,
        }
 
        /* Allocate/reinit ZA storage */
-       sme_alloc(target);
+       sme_alloc(target, true);
        if (!target->thread.sme_state) {
                ret = -ENOMEM;
                goto out;
@@ -1170,8 +1177,13 @@ static int zt_set(struct task_struct *target,
        if (!system_supports_sme2())
                return -EINVAL;
 
+       /* Ensure SVE storage in case this is first use of SME */
+       sve_alloc(target, false);
+       if (!target->thread.sve_state)
+               return -ENOMEM;
+
        if (!thread_za_enabled(&target->thread)) {
-               sme_alloc(target);
+               sme_alloc(target, true);
                if (!target->thread.sme_state)
                        return -ENOMEM;
        }
@@ -1179,8 +1191,10 @@ static int zt_set(struct task_struct *target,
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                 thread_zt_state(&target->thread),
                                 0, ZT_SIG_REG_BYTES);
-       if (ret == 0)
+       if (ret == 0) {
                target->thread.svcr |= SVCR_ZA_MASK;
+               set_tsk_thread_flag(target, TIF_SME);
+       }
 
        fpsimd_flush_task_state(target);
 
index e304f7e..c7ebe74 100644 (file)
@@ -475,7 +475,7 @@ static int restore_za_context(struct user_ctxs *user)
        fpsimd_flush_task_state(current);
        /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
 
-       sme_alloc(current);
+       sme_alloc(current, true);
        if (!current->thread.sme_state) {
                current->thread.svcr &= ~SVCR_ZA_MASK;
                clear_thread_flag(TIF_SME);
index f531da6..83c1e09 100644 (file)
@@ -25,7 +25,6 @@ menuconfig KVM
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select HAVE_KVM_CPU_RELAX_INTERCEPT
-       select HAVE_KVM_ARCH_TLB_FLUSH_ALL
        select KVM_MMIO
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select KVM_XFER_TO_GUEST_WORK
@@ -43,6 +42,7 @@ menuconfig KVM
        select SCHED_INFO
        select GUEST_PERF_EVENTS if PERF_EVENTS
        select INTERVAL_TREE
+       select XARRAY_MULTI
        help
          Support hosting virtualized guest machines.
 
index 72dc53a..4866b3f 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/kvm_pkvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/sections.h>
@@ -55,7 +56,7 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
 
 static bool vgic_present, kvm_arm_initialised;
 
-static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized);
 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 bool is_kvm_arm_initialised(void)
@@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 #endif
 
        /* Force users to call KVM_ARM_VCPU_INIT */
-       vcpu->arch.target = -1;
+       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
        bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
        vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                vcpu_ptrauth_disable(vcpu);
        kvm_arch_vcpu_load_debug_state_flags(vcpu);
 
-       if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+       if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
                vcpu_set_on_unsupported_cpu(vcpu);
 }
 
@@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.target >= 0;
+       return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
 }
 
 /*
@@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
                        kvm_pmu_handle_pmcr(vcpu,
                                            __vcpu_sys_reg(vcpu, PMCR_EL0));
 
+               if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
+                       kvm_vcpu_pmu_restore_guest(vcpu);
+
                if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
                        return kvm_vcpu_suspend(vcpu);
 
@@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
        if (likely(!vcpu_mode_is_32bit(vcpu)))
                return false;
 
+       if (vcpu_has_nv(vcpu))
+               return true;
+
        return !kvm_supports_32bit_el0();
 }
 
@@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                         * invalid. The VMM can try and fix it by issuing  a
                         * KVM_ARM_VCPU_INIT if it really wants to.
                         */
-                       vcpu->arch.target = -1;
+                       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                        ret = ARM_EXCEPTION_IL;
                }
 
@@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
 {
        unsigned long features = init->features[0];
 
-       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
-                       vcpu->arch.target != init->target;
+       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 }
 
 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
            !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
                goto out_unlock;
 
-       vcpu->arch.target = init->target;
        bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 
        /* Now we know what it is, we can reset it. */
        ret = kvm_reset_vcpu(vcpu);
        if (ret) {
-               vcpu->arch.target = -1;
                bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
                goto out_unlock;
        }
 
        bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
        set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
-
+       vcpu_set_flag(vcpu, VCPU_INITIALIZED);
 out_unlock:
        mutex_unlock(&kvm->arch.config_lock);
        return ret;
@@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
        int ret;
 
-       if (init->target != kvm_target_cpu())
+       if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
+           init->target != kvm_target_cpu())
                return -EINVAL;
 
        ret = kvm_vcpu_init_check_features(vcpu, init);
        if (ret)
                return ret;
 
-       if (vcpu->arch.target == -1)
+       if (!kvm_vcpu_initialized(vcpu))
                return __kvm_vcpu_set_target(vcpu, init);
 
        if (kvm_vcpu_init_changed(vcpu, init))
@@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
                                        struct kvm_arm_device_addr *dev_addr)
 {
@@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
        case KVM_ARM_PREFERRED_TARGET: {
-               struct kvm_vcpu_init init;
-
-               kvm_vcpu_preferred_target(&init);
+               struct kvm_vcpu_init init = {
+                       .target = KVM_ARM_TARGET_GENERIC_V8,
+               };
 
                if (copy_to_user(argp, &init, sizeof(init)))
                        return -EFAULT;
@@ -1864,18 +1863,24 @@ static void cpu_hyp_reinit(void)
        cpu_hyp_init_features();
 }
 
-static void _kvm_arch_hardware_enable(void *discard)
+static void cpu_hyp_init(void *discard)
 {
-       if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+       if (!__this_cpu_read(kvm_hyp_initialized)) {
                cpu_hyp_reinit();
-               __this_cpu_write(kvm_arm_hardware_enabled, 1);
+               __this_cpu_write(kvm_hyp_initialized, 1);
        }
 }
 
-int kvm_arch_hardware_enable(void)
+static void cpu_hyp_uninit(void *discard)
 {
-       int was_enabled;
+       if (__this_cpu_read(kvm_hyp_initialized)) {
+               cpu_hyp_reset();
+               __this_cpu_write(kvm_hyp_initialized, 0);
+       }
+}
 
+int kvm_arch_hardware_enable(void)
+{
        /*
         * Most calls to this function are made with migration
         * disabled, but not with preemption disabled. The former is
@@ -1884,36 +1889,23 @@ int kvm_arch_hardware_enable(void)
         */
        preempt_disable();
 
-       was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
-       _kvm_arch_hardware_enable(NULL);
+       cpu_hyp_init(NULL);
 
-       if (!was_enabled) {
-               kvm_vgic_cpu_up();
-               kvm_timer_cpu_up();
-       }
+       kvm_vgic_cpu_up();
+       kvm_timer_cpu_up();
 
        preempt_enable();
 
        return 0;
 }
 
-static void _kvm_arch_hardware_disable(void *discard)
-{
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               cpu_hyp_reset();
-               __this_cpu_write(kvm_arm_hardware_enabled, 0);
-       }
-}
-
 void kvm_arch_hardware_disable(void)
 {
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               kvm_timer_cpu_down();
-               kvm_vgic_cpu_down();
-       }
+       kvm_timer_cpu_down();
+       kvm_vgic_cpu_down();
 
        if (!is_protected_kvm_enabled())
-               _kvm_arch_hardware_disable(NULL);
+               cpu_hyp_uninit(NULL);
 }
 
 #ifdef CONFIG_CPU_PM
@@ -1922,16 +1914,16 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                                    void *v)
 {
        /*
-        * kvm_arm_hardware_enabled is left with its old value over
+        * kvm_hyp_initialized is left with its old value over
         * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
         * re-enable hyp.
         */
        switch (cmd) {
        case CPU_PM_ENTER:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
+               if (__this_cpu_read(kvm_hyp_initialized))
                        /*
-                        * don't update kvm_arm_hardware_enabled here
-                        * so that the hardware will be re-enabled
+                        * don't update kvm_hyp_initialized here
+                        * so that the hyp will be re-enabled
                         * when we resume. See below.
                         */
                        cpu_hyp_reset();
@@ -1939,8 +1931,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                return NOTIFY_OK;
        case CPU_PM_ENTER_FAILED:
        case CPU_PM_EXIT:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
-                       /* The hardware was enabled before suspend. */
+               if (__this_cpu_read(kvm_hyp_initialized))
+                       /* The hyp was enabled before suspend. */
                        cpu_hyp_reinit();
 
                return NOTIFY_OK;
@@ -2021,7 +2013,7 @@ static int __init init_subsystems(void)
        /*
         * Enable hardware so that subsystem initialisation can access EL2.
         */
-       on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+       on_each_cpu(cpu_hyp_init, NULL, 1);
 
        /*
         * Register CPU lower-power notifier
@@ -2059,7 +2051,7 @@ out:
                hyp_cpu_pm_exit();
 
        if (err || !is_protected_kvm_enabled())
-               on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+               on_each_cpu(cpu_hyp_uninit, NULL, 1);
 
        return err;
 }
@@ -2097,7 +2089,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
         * The stub hypercalls are now disabled, so set our local flag to
         * prevent a later re-init attempt in kvm_arch_hardware_enable().
         */
-       __this_cpu_write(kvm_arm_hardware_enabled, 1);
+       __this_cpu_write(kvm_hyp_initialized, 1);
        preempt_enable();
 
        return ret;
@@ -2283,30 +2275,8 @@ static int __init init_hyp_mode(void)
        for_each_possible_cpu(cpu) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
                char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-               unsigned long hyp_addr;
-
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
-               if (err) {
-                       kvm_err("Cannot allocate hyp stack guard page\n");
-                       goto out_err;
-               }
 
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
-                                           __pa(stack_page), PAGE_HYP);
+               err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
                if (err) {
                        kvm_err("Cannot map hyp stack\n");
                        goto out_err;
@@ -2319,8 +2289,6 @@ static int __init init_hyp_mode(void)
                 * has been mapped in the flexible private VA space.
                 */
                params->stack_pa = __pa(stack_page);
-
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        for_each_possible_cpu(cpu) {
index b966620..9ced1bf 100644 (file)
 
 #include "trace.h"
 
+enum trap_behaviour {
+       BEHAVE_HANDLE_LOCALLY   = 0,
+       BEHAVE_FORWARD_READ     = BIT(0),
+       BEHAVE_FORWARD_WRITE    = BIT(1),
+       BEHAVE_FORWARD_ANY      = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+};
+
+struct trap_bits {
+       const enum vcpu_sysreg          index;
+       const enum trap_behaviour       behaviour;
+       const u64                       value;
+       const u64                       mask;
+};
+
+/* Coarse Grained Trap definitions */
+enum cgt_group_id {
+       /* Indicates no coarse trap control */
+       __RESERVED__,
+
+       /*
+        * The first batch of IDs denote coarse trapping that are used
+        * on their own instead of being part of a combination of
+        * trap controls.
+        */
+       CGT_HCR_TID1,
+       CGT_HCR_TID2,
+       CGT_HCR_TID3,
+       CGT_HCR_IMO,
+       CGT_HCR_FMO,
+       CGT_HCR_TIDCP,
+       CGT_HCR_TACR,
+       CGT_HCR_TSW,
+       CGT_HCR_TPC,
+       CGT_HCR_TPU,
+       CGT_HCR_TTLB,
+       CGT_HCR_TVM,
+       CGT_HCR_TDZ,
+       CGT_HCR_TRVM,
+       CGT_HCR_TLOR,
+       CGT_HCR_TERR,
+       CGT_HCR_APK,
+       CGT_HCR_NV,
+       CGT_HCR_NV_nNV2,
+       CGT_HCR_NV1_nNV2,
+       CGT_HCR_AT,
+       CGT_HCR_nFIEN,
+       CGT_HCR_TID4,
+       CGT_HCR_TICAB,
+       CGT_HCR_TOCU,
+       CGT_HCR_ENSCXT,
+       CGT_HCR_TTLBIS,
+       CGT_HCR_TTLBOS,
+
+       CGT_MDCR_TPMCR,
+       CGT_MDCR_TPM,
+       CGT_MDCR_TDE,
+       CGT_MDCR_TDA,
+       CGT_MDCR_TDOSA,
+       CGT_MDCR_TDRA,
+       CGT_MDCR_E2PB,
+       CGT_MDCR_TPMS,
+       CGT_MDCR_TTRF,
+       CGT_MDCR_E2TB,
+       CGT_MDCR_TDCC,
+
+       /*
+        * Anything after this point is a combination of coarse trap
+        * controls, which must all be evaluated to decide what to do.
+        */
+       __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_TID2_TID4,
+       CGT_HCR_TTLB_TTLBIS,
+       CGT_HCR_TTLB_TTLBOS,
+       CGT_HCR_TVM_TRVM,
+       CGT_HCR_TPU_TICAB,
+       CGT_HCR_TPU_TOCU,
+       CGT_HCR_NV1_nNV2_ENSCXT,
+       CGT_MDCR_TPM_TPMCR,
+       CGT_MDCR_TDE_TDA,
+       CGT_MDCR_TDE_TDOSA,
+       CGT_MDCR_TDE_TDRA,
+       CGT_MDCR_TDCC_TDE_TDA,
+
+       /*
+        * Anything after this point requires a callback evaluating a
+        * complex trap condition. Ugly stuff.
+        */
+       __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PTEN,
+
+       /* Must be last */
+       __NR_CGT_GROUP_IDS__
+};
+
+static const struct trap_bits coarse_trap_bits[] = {
+       [CGT_HCR_TID1] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID1,
+               .mask           = HCR_TID1,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TID2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID2,
+               .mask           = HCR_TID2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID3] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID3,
+               .mask           = HCR_TID3,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_IMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_IMO,
+               .mask           = HCR_IMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_FMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_FMO,
+               .mask           = HCR_FMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TIDCP] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TIDCP,
+               .mask           = HCR_TIDCP,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TACR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TACR,
+               .mask           = HCR_TACR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TSW] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TSW,
+               .mask           = HCR_TSW,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
+               .index          = HCR_EL2,
+               .value          = HCR_TPC,
+               .mask           = HCR_TPC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TPU,
+               .mask           = HCR_TPU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLB,
+               .mask           = HCR_TTLB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TVM,
+               .mask           = HCR_TVM,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TDZ] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TDZ,
+               .mask           = HCR_TDZ,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TRVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TRVM,
+               .mask           = HCR_TRVM,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TLOR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TLOR,
+               .mask           = HCR_TLOR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TERR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TERR,
+               .mask           = HCR_TERR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_APK] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_APK,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV1_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV | HCR_NV1,
+               .mask           = HCR_NV | HCR_NV1 | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_AT] = {
+               .index          = HCR_EL2,
+               .value          = HCR_AT,
+               .mask           = HCR_AT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_nFIEN] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_FIEN,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID4] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID4,
+               .mask           = HCR_TID4,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TICAB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TICAB,
+               .mask           = HCR_TICAB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TOCU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TOCU,
+               .mask           = HCR_TOCU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_ENSCXT] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_ENSCXT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBIS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBIS,
+               .mask           = HCR_TTLBIS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBOS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBOS,
+               .mask           = HCR_TTLBOS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMCR] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMCR,
+               .mask           = MDCR_EL2_TPMCR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPM] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPM,
+               .mask           = MDCR_EL2_TPM,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDE] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDE,
+               .mask           = MDCR_EL2_TDE,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDA,
+               .mask           = MDCR_EL2_TDA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDOSA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDOSA,
+               .mask           = MDCR_EL2_TDOSA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDRA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDRA,
+               .mask           = MDCR_EL2_TDRA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2PB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2PB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMS] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMS,
+               .mask           = MDCR_EL2_TPMS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TTRF] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TTRF,
+               .mask           = MDCR_EL2_TTRF,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2TB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2TB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDCC] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDCC,
+               .mask           = MDCR_EL2_TDCC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+};
+
+#define MCB(id, ...)                                           \
+       [id - __MULTIPLE_CONTROL_BITS__]        =               \
+               (const enum cgt_group_id[]){                    \
+               __VA_ARGS__, __RESERVED__                       \
+               }
+
+static const enum cgt_group_id *coarse_control_combo[] = {
+       MCB(CGT_HCR_IMO_FMO,            CGT_HCR_IMO, CGT_HCR_FMO),
+       MCB(CGT_HCR_TID2_TID4,          CGT_HCR_TID2, CGT_HCR_TID4),
+       MCB(CGT_HCR_TTLB_TTLBIS,        CGT_HCR_TTLB, CGT_HCR_TTLBIS),
+       MCB(CGT_HCR_TTLB_TTLBOS,        CGT_HCR_TTLB, CGT_HCR_TTLBOS),
+       MCB(CGT_HCR_TVM_TRVM,           CGT_HCR_TVM, CGT_HCR_TRVM),
+       MCB(CGT_HCR_TPU_TICAB,          CGT_HCR_TPU, CGT_HCR_TICAB),
+       MCB(CGT_HCR_TPU_TOCU,           CGT_HCR_TPU, CGT_HCR_TOCU),
+       MCB(CGT_HCR_NV1_nNV2_ENSCXT,    CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
+       MCB(CGT_MDCR_TPM_TPMCR,         CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+       MCB(CGT_MDCR_TDE_TDA,           CGT_MDCR_TDE, CGT_MDCR_TDA),
+       MCB(CGT_MDCR_TDE_TDOSA,         CGT_MDCR_TDE, CGT_MDCR_TDOSA),
+       MCB(CGT_MDCR_TDE_TDRA,          CGT_MDCR_TDE, CGT_MDCR_TDRA),
+       MCB(CGT_MDCR_TDCC_TDE_TDA,      CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+};
+
+typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
+
+/*
+ * Warning, maximum confusion ahead.
+ *
+ * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN
+ * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN
+ *
+ * Note the single letter difference? Yet, the bits have the same
+ * function despite a different layout and a different name.
+ *
+ * We don't try to reconcile this mess. We just use the E2H=0 bits
+ * to generate something that is in the E2H=1 format, and live with
+ * it. You're welcome.
+ */
+static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu)
+{
+       u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+       if (!vcpu_el2_e2h_is_set(vcpu))
+               val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+       return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10);
+}
+
+static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+#define CCC(id, fn)                            \
+       [id - __COMPLEX_CONDITIONS__] = fn
+
+static const complex_condition_check ccc[] = {
+       CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
+       CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+};
+
+/*
+ * Bit assignment for the trap controls. We use a 64bit word with the
+ * following layout for each trapped sysreg:
+ *
+ * [9:0]       enum cgt_group_id (10 bits)
+ * [13:10]     enum fgt_group_id (4 bits)
+ * [19:14]     bit number in the FGT register (6 bits)
+ * [20]                trap polarity (1 bit)
+ * [25:21]     FG filter (5 bits)
+ * [62:26]     Unused (37 bits)
+ * [63]                RES0 - Must be zero, as lost on insertion in the xarray
+ */
+#define TC_CGT_BITS    10
+#define TC_FGT_BITS    4
+#define TC_FGF_BITS    5
+
+union trap_config {
+       u64     val;
+       struct {
+               unsigned long   cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
+               unsigned long   fgt:TC_FGT_BITS; /* Fine Grained Trap id */
+               unsigned long   bit:6;           /* Bit number */
+               unsigned long   pol:1;           /* Polarity */
+               unsigned long   fgf:TC_FGF_BITS; /* Fine Grained Filter */
+               unsigned long   unused:37;       /* Unused, should be zero */
+               unsigned long   mbz:1;           /* Must Be Zero */
+       };
+};
+
+struct encoding_to_trap_config {
+       const u32                       encoding;
+       const u32                       end;
+       const union trap_config         tc;
+       const unsigned int              line;
+};
+
+#define SR_RANGE_TRAP(sr_start, sr_end, trap_id)                       \
+       {                                                               \
+               .encoding       = sr_start,                             \
+               .end            = sr_end,                               \
+               .tc             = {                                     \
+                       .cgt            = trap_id,                      \
+               },                                                      \
+               .line = __LINE__,                                       \
+       }
+
+#define SR_TRAP(sr, trap_id)           SR_RANGE_TRAP(sr, sr, trap_id)
+
+/*
+ * Map encoding to trap bits for exception reported with EC=0x18.
+ * These must only be evaluated when running a nested hypervisor, but
+ * that the current context is not a hypervisor context. When the
+ * trapped access matches one of the trap controls, the exception is
+ * re-injected in the nested hypervisor.
+ */
+static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
+       SR_TRAP(SYS_REVIDR_EL1,         CGT_HCR_TID1),
+       SR_TRAP(SYS_AIDR_EL1,           CGT_HCR_TID1),
+       SR_TRAP(SYS_SMIDR_EL1,          CGT_HCR_TID1),
+       SR_TRAP(SYS_CTR_EL0,            CGT_HCR_TID2),
+       SR_TRAP(SYS_CCSIDR_EL1,         CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CCSIDR2_EL1,        CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CLIDR_EL1,          CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CSSELR_EL1,         CGT_HCR_TID2_TID4),
+       SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
+                     sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
+       SR_TRAP(SYS_ICC_SGI0R_EL1,      CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_ASGI1R_EL1,     CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_SGI1R_EL1,      CGT_HCR_IMO_FMO),
+       SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
+                     sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
+                     sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0),
+                     sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0),
+                     sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0),
+                     sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0),
+                     sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0),
+                     sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0),
+                     sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0),
+                     sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0),
+                     sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0),
+                     sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0),
+                     sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0),
+                     sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0),
+                     sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0),
+                     sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0),
+                     sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_TRAP(SYS_ACTLR_EL1,          CGT_HCR_TACR),
+       SR_TRAP(SYS_DC_ISW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CSW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CISW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGDSW,          CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAP,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVADP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGDVAC,         CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVADP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVADP,         CGT_HCR_TPC),
+       SR_TRAP(SYS_IC_IVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLU,           CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLUIS,         CGT_HCR_TPU_TICAB),
+       SR_TRAP(SYS_DC_CVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(OP_TLBI_RVAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1,           CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1NXS,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1IS,         CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1ISNXS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OS,         CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VMALLE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OSNXS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(SYS_SCTLR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TCR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_ESR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_FAR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_MAIR_EL1,           CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AMAIR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_CONTEXTIDR_EL1,     CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_DC_ZVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GZVA,            CGT_HCR_TDZ),
+       SR_TRAP(SYS_LORSA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LOREA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORN_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORC_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORID_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_ERRIDR_EL1,         CGT_HCR_TERR),
+       SR_TRAP(SYS_ERRSELR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXADDR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXCTLR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXFR_EL1,          CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC0_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC1_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC2_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC3_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXSTATUS_EL1,      CGT_HCR_TERR),
+       SR_TRAP(SYS_APIAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYHI_EL1,      CGT_HCR_APK),
+       /* All _EL2 registers */
+       SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
+                     sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+       /* Skip the SP_EL1 encoding... */
+       SR_TRAP(SYS_SPSR_EL2,           CGT_HCR_NV),
+       SR_TRAP(SYS_ELR_EL2,            CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
+                     sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
+                     sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+       /* All _EL02, _EL12 registers */
+       SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
+                     sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
+                     sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2R,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2W,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1W,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0W,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1NXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1NXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2NXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2NXS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1NXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1IS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1IS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2IS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2IS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1IS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1ISNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2ISNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2ISNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OSNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OSNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OSNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_CPP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_DVP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_CFP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(SYS_SP_EL1,             CGT_HCR_NV_nNV2),
+       SR_TRAP(SYS_VBAR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_ELR_EL1,            CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SPSR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SCXTNUM_EL1,        CGT_HCR_NV1_nNV2_ENSCXT),
+       SR_TRAP(SYS_SCXTNUM_EL0,        CGT_HCR_ENSCXT),
+       SR_TRAP(OP_AT_S1E1R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1RP,           CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1WP,           CGT_HCR_AT),
+       SR_TRAP(SYS_ERXPFGF_EL1,        CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCTL_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCDN_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_PMCR_EL0,           CGT_MDCR_TPM_TPMCR),
+       SR_TRAP(SYS_PMCNTENSET_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCNTENCLR_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSSET_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSCLR_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID0_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID1_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVTYPER_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSWINC_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSELR_EL0,         CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVCNTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCNTR_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMUSERENR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENSET_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENCLR_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMMIR_EL1,          CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(0),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(1),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(2),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(3),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(4),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(5),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(6),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(7),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(8),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(9),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(10),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(11),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(12),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(13),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(14),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(15),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(16),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(17),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(18),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(19),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(20),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(21),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(22),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(23),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(24),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(25),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(26),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(27),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(28),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(29),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(30),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(0),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(1),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(2),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(3),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(4),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(5),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(6),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(7),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(8),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(9),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCFILTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_MDCCSR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDCCINT_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRRX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRTX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_DBGDTR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       /*
+        * Also covers DBGDTRRX_EL0, which has the same encoding as
+        * SYS_DBGDTRTX_EL0...
+        */
+       SR_TRAP(SYS_DBGDTRTX_EL0,       CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDSCR_EL1,          CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSECCR_EL1,         CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMSET_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMCLR_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGAUTHSTATUS_EL1,  CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSLAR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSLSR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSDLR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_DBGPRCR_EL1,        CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_MDRAR_EL1,          CGT_MDCR_TDE_TDRA),
+       SR_TRAP(SYS_PMBLIMITR_EL1,      CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBPTR_EL1,         CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBSR_EL1,          CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMSCR_EL1,          CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSEVFR_EL1,        CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSFCR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSICR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIDR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIRR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSLATFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSNEVFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_TRFCR_EL1,          CGT_MDCR_TTRF),
+       SR_TRAP(SYS_TRBBASER_EL1,       CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBLIMITR_EL1,      CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBMAR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBPTR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBSR_EL1,          CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBTRG_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_CNTP_TVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CTL_EL0,       CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTPCT_EL0,         CGT_CNTHCTL_EL1PCTEN),
+       SR_TRAP(SYS_CNTPCTSS_EL0,       CGT_CNTHCTL_EL1PCTEN),
+};
+
+static DEFINE_XARRAY(sr_forward_xa);
+
+enum fgt_group_id {
+       __NO_FGT_GROUP__,
+       HFGxTR_GROUP,
+       HDFGRTR_GROUP,
+       HDFGWTR_GROUP,
+       HFGITR_GROUP,
+
+       /* Must be last */
+       __NR_FGT_GROUP_IDS__
+};
+
+enum fg_filter_id {
+       __NO_FGF__,
+       HCRX_FGTnXS,
+
+       /* Must be last */
+       __NR_FG_FILTER_IDS__
+};
+
+#define SR_FGF(sr, g, b, p, f)                                 \
+       {                                                       \
+               .encoding       = sr,                           \
+               .end            = sr,                           \
+               .tc             = {                             \
+                       .fgt = g ## _GROUP,                     \
+                       .bit = g ## _EL2_ ## b ## _SHIFT,       \
+                       .pol = p,                               \
+                       .fgf = f,                               \
+               },                                              \
+               .line = __LINE__,                               \
+       }
+
+#define SR_FGT(sr, g, b, p)    SR_FGF(sr, g, b, p, __NO_FGF__)
+
+static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+       /* HFGRTR_EL2, HFGWTR_EL2 */
+       SR_FGT(SYS_TPIDR2_EL0,          HFGxTR, nTPIDR2_EL0, 0),
+       SR_FGT(SYS_SMPRI_EL1,           HFGxTR, nSMPRI_EL1, 0),
+       SR_FGT(SYS_ACCDATA_EL1,         HFGxTR, nACCDATA_EL1, 0),
+       SR_FGT(SYS_ERXADDR_EL1,         HFGxTR, ERXADDR_EL1, 1),
+       SR_FGT(SYS_ERXPFGCDN_EL1,       HFGxTR, ERXPFGCDN_EL1, 1),
+       SR_FGT(SYS_ERXPFGCTL_EL1,       HFGxTR, ERXPFGCTL_EL1, 1),
+       SR_FGT(SYS_ERXPFGF_EL1,         HFGxTR, ERXPFGF_EL1, 1),
+       SR_FGT(SYS_ERXMISC0_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC1_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC2_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC3_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXSTATUS_EL1,       HFGxTR, ERXSTATUS_EL1, 1),
+       SR_FGT(SYS_ERXCTLR_EL1,         HFGxTR, ERXCTLR_EL1, 1),
+       SR_FGT(SYS_ERXFR_EL1,           HFGxTR, ERXFR_EL1, 1),
+       SR_FGT(SYS_ERRSELR_EL1,         HFGxTR, ERRSELR_EL1, 1),
+       SR_FGT(SYS_ERRIDR_EL1,          HFGxTR, ERRIDR_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN0_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN1_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_VBAR_EL1,            HFGxTR, VBAR_EL1, 1),
+       SR_FGT(SYS_TTBR1_EL1,           HFGxTR, TTBR1_EL1, 1),
+       SR_FGT(SYS_TTBR0_EL1,           HFGxTR, TTBR0_EL1, 1),
+       SR_FGT(SYS_TPIDR_EL0,           HFGxTR, TPIDR_EL0, 1),
+       SR_FGT(SYS_TPIDRRO_EL0,         HFGxTR, TPIDRRO_EL0, 1),
+       SR_FGT(SYS_TPIDR_EL1,           HFGxTR, TPIDR_EL1, 1),
+       SR_FGT(SYS_TCR_EL1,             HFGxTR, TCR_EL1, 1),
+       SR_FGT(SYS_SCXTNUM_EL0,         HFGxTR, SCXTNUM_EL0, 1),
+       SR_FGT(SYS_SCXTNUM_EL1,         HFGxTR, SCXTNUM_EL1, 1),
+       SR_FGT(SYS_SCTLR_EL1,           HFGxTR, SCTLR_EL1, 1),
+       SR_FGT(SYS_REVIDR_EL1,          HFGxTR, REVIDR_EL1, 1),
+       SR_FGT(SYS_PAR_EL1,             HFGxTR, PAR_EL1, 1),
+       SR_FGT(SYS_MPIDR_EL1,           HFGxTR, MPIDR_EL1, 1),
+       SR_FGT(SYS_MIDR_EL1,            HFGxTR, MIDR_EL1, 1),
+       SR_FGT(SYS_MAIR_EL1,            HFGxTR, MAIR_EL1, 1),
+       SR_FGT(SYS_LORSA_EL1,           HFGxTR, LORSA_EL1, 1),
+       SR_FGT(SYS_LORN_EL1,            HFGxTR, LORN_EL1, 1),
+       SR_FGT(SYS_LORID_EL1,           HFGxTR, LORID_EL1, 1),
+       SR_FGT(SYS_LOREA_EL1,           HFGxTR, LOREA_EL1, 1),
+       SR_FGT(SYS_LORC_EL1,            HFGxTR, LORC_EL1, 1),
+       SR_FGT(SYS_ISR_EL1,             HFGxTR, ISR_EL1, 1),
+       SR_FGT(SYS_FAR_EL1,             HFGxTR, FAR_EL1, 1),
+       SR_FGT(SYS_ESR_EL1,             HFGxTR, ESR_EL1, 1),
+       SR_FGT(SYS_DCZID_EL0,           HFGxTR, DCZID_EL0, 1),
+       SR_FGT(SYS_CTR_EL0,             HFGxTR, CTR_EL0, 1),
+       SR_FGT(SYS_CSSELR_EL1,          HFGxTR, CSSELR_EL1, 1),
+       SR_FGT(SYS_CPACR_EL1,           HFGxTR, CPACR_EL1, 1),
+       SR_FGT(SYS_CONTEXTIDR_EL1,      HFGxTR, CONTEXTIDR_EL1, 1),
+       SR_FGT(SYS_CLIDR_EL1,           HFGxTR, CLIDR_EL1, 1),
+       SR_FGT(SYS_CCSIDR_EL1,          HFGxTR, CCSIDR_EL1, 1),
+       SR_FGT(SYS_APIBKEYLO_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIBKEYHI_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIAKEYLO_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APIAKEYHI_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APGAKEYLO_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APGAKEYHI_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APDBKEYLO_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDBKEYHI_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDAKEYLO_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_APDAKEYHI_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_AMAIR_EL1,           HFGxTR, AMAIR_EL1, 1),
+       SR_FGT(SYS_AIDR_EL1,            HFGxTR, AIDR_EL1, 1),
+       SR_FGT(SYS_AFSR1_EL1,           HFGxTR, AFSR1_EL1, 1),
+       SR_FGT(SYS_AFSR0_EL1,           HFGxTR, AFSR0_EL1, 1),
+       /* HFGITR_EL2 */
+       SR_FGT(OP_BRB_IALL,             HFGITR, nBRBIALL, 0),
+       SR_FGT(OP_BRB_INJ,              HFGITR, nBRBINJ, 0),
+       SR_FGT(SYS_DC_CVAC,             HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGVAC,            HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGDVAC,           HFGITR, DCCVAC, 1),
+       SR_FGT(OP_CPP_RCTX,             HFGITR, CPPRCTX, 1),
+       SR_FGT(OP_DVP_RCTX,             HFGITR, DVPRCTX, 1),
+       SR_FGT(OP_CFP_RCTX,             HFGITR, CFPRCTX, 1),
+       SR_FGT(OP_TLBI_VAALE1,          HFGITR, TLBIVAALE1, 1),
+       SR_FGT(OP_TLBI_VALE1,           HFGITR, TLBIVALE1, 1),
+       SR_FGT(OP_TLBI_VAAE1,           HFGITR, TLBIVAAE1, 1),
+       SR_FGT(OP_TLBI_ASIDE1,          HFGITR, TLBIASIDE1, 1),
+       SR_FGT(OP_TLBI_VAE1,            HFGITR, TLBIVAE1, 1),
+       SR_FGT(OP_TLBI_VMALLE1,         HFGITR, TLBIVMALLE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1,         HFGITR, TLBIRVAALE1, 1),
+       SR_FGT(OP_TLBI_RVALE1,          HFGITR, TLBIRVALE1, 1),
+       SR_FGT(OP_TLBI_RVAAE1,          HFGITR, TLBIRVAAE1, 1),
+       SR_FGT(OP_TLBI_RVAE1,           HFGITR, TLBIRVAE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1IS,       HFGITR, TLBIRVAALE1IS, 1),
+       SR_FGT(OP_TLBI_RVALE1IS,        HFGITR, TLBIRVALE1IS, 1),
+       SR_FGT(OP_TLBI_RVAAE1IS,        HFGITR, TLBIRVAAE1IS, 1),
+       SR_FGT(OP_TLBI_RVAE1IS,         HFGITR, TLBIRVAE1IS, 1),
+       SR_FGT(OP_TLBI_VAALE1IS,        HFGITR, TLBIVAALE1IS, 1),
+       SR_FGT(OP_TLBI_VALE1IS,         HFGITR, TLBIVALE1IS, 1),
+       SR_FGT(OP_TLBI_VAAE1IS,         HFGITR, TLBIVAAE1IS, 1),
+       SR_FGT(OP_TLBI_ASIDE1IS,        HFGITR, TLBIASIDE1IS, 1),
+       SR_FGT(OP_TLBI_VAE1IS,          HFGITR, TLBIVAE1IS, 1),
+       SR_FGT(OP_TLBI_VMALLE1IS,       HFGITR, TLBIVMALLE1IS, 1),
+       SR_FGT(OP_TLBI_RVAALE1OS,       HFGITR, TLBIRVAALE1OS, 1),
+       SR_FGT(OP_TLBI_RVALE1OS,        HFGITR, TLBIRVALE1OS, 1),
+       SR_FGT(OP_TLBI_RVAAE1OS,        HFGITR, TLBIRVAAE1OS, 1),
+       SR_FGT(OP_TLBI_RVAE1OS,         HFGITR, TLBIRVAE1OS, 1),
+       SR_FGT(OP_TLBI_VAALE1OS,        HFGITR, TLBIVAALE1OS, 1),
+       SR_FGT(OP_TLBI_VALE1OS,         HFGITR, TLBIVALE1OS, 1),
+       SR_FGT(OP_TLBI_VAAE1OS,         HFGITR, TLBIVAAE1OS, 1),
+       SR_FGT(OP_TLBI_ASIDE1OS,        HFGITR, TLBIASIDE1OS, 1),
+       SR_FGT(OP_TLBI_VAE1OS,          HFGITR, TLBIVAE1OS, 1),
+       SR_FGT(OP_TLBI_VMALLE1OS,       HFGITR, TLBIVMALLE1OS, 1),
+       /* nXS variants must be checked against HCRX_EL2.FGTnXS */
+       SR_FGF(OP_TLBI_VAALE1NXS,       HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1NXS,        HFGITR, TLBIVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1NXS,        HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1NXS,       HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1NXS,         HFGITR, TLBIVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1NXS,      HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1NXS,      HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1NXS,       HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1NXS,       HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1NXS,        HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1ISNXS,    HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1ISNXS,     HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1ISNXS,     HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1ISNXS,      HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1ISNXS,     HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1ISNXS,      HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1ISNXS,      HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1ISNXS,     HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1ISNXS,       HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1ISNXS,    HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1OSNXS,    HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1OSNXS,     HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1OSNXS,     HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1OSNXS,      HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1OSNXS,     HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1OSNXS,      HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1OSNXS,      HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1OSNXS,     HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1OSNXS,       HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1OSNXS,    HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS),
+       SR_FGT(OP_AT_S1E1WP,            HFGITR, ATS1E1WP, 1),
+       SR_FGT(OP_AT_S1E1RP,            HFGITR, ATS1E1RP, 1),
+       SR_FGT(OP_AT_S1E0W,             HFGITR, ATS1E0W, 1),
+       SR_FGT(OP_AT_S1E0R,             HFGITR, ATS1E0R, 1),
+       SR_FGT(OP_AT_S1E1W,             HFGITR, ATS1E1W, 1),
+       SR_FGT(OP_AT_S1E1R,             HFGITR, ATS1E1R, 1),
+       SR_FGT(SYS_DC_ZVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GZVA,             HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_CIVAC,            HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGVAC,           HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGDVAC,          HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CVADP,            HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGVADP,           HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGDVADP,          HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CVAP,             HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGVAP,            HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGDVAP,           HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CVAU,             HFGITR, DCCVAU, 1),
+       SR_FGT(SYS_DC_CISW,             HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGSW,            HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGDSW,           HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CSW,              HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGSW,             HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGDSW,            HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_ISW,              HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGSW,             HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGDSW,            HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IVAC,             HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGVAC,            HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGDVAC,           HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_IC_IVAU,             HFGITR, ICIVAU, 1),
+       SR_FGT(SYS_IC_IALLU,            HFGITR, ICIALLU, 1),
+       SR_FGT(SYS_IC_IALLUIS,          HFGITR, ICIALLUIS, 1),
+       /* HDFGRTR_EL2 */
+       SR_FGT(SYS_PMBIDR_EL1,          HDFGRTR, PMBIDR_EL1, 1),
+       SR_FGT(SYS_PMSNEVFR_EL1,        HDFGRTR, nPMSNEVFR_EL1, 0),
+       SR_FGT(SYS_BRBINF_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINFINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRCINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGTINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTS_EL1,           HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBCR_EL1,           HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBFCR_EL1,          HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBIDR0_EL1,         HDFGRTR, nBRBIDR, 0),
+       SR_FGT(SYS_PMCEID0_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMCEID1_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMUSERENR_EL0,       HDFGRTR, PMUSERENR_EL0, 1),
+       SR_FGT(SYS_TRBTRG_EL1,          HDFGRTR, TRBTRG_EL1, 1),
+       SR_FGT(SYS_TRBSR_EL1,           HDFGRTR, TRBSR_EL1, 1),
+       SR_FGT(SYS_TRBPTR_EL1,          HDFGRTR, TRBPTR_EL1, 1),
+       SR_FGT(SYS_TRBMAR_EL1,          HDFGRTR, TRBMAR_EL1, 1),
+       SR_FGT(SYS_TRBLIMITR_EL1,       HDFGRTR, TRBLIMITR_EL1, 1),
+       SR_FGT(SYS_TRBIDR_EL1,          HDFGRTR, TRBIDR_EL1, 1),
+       SR_FGT(SYS_TRBBASER_EL1,        HDFGRTR, TRBBASER_EL1, 1),
+       SR_FGT(SYS_TRCVICTLR,           HDFGRTR, TRCVICTLR, 1),
+       SR_FGT(SYS_TRCSTATR,            HDFGRTR, TRCSTATR, 1),
+       SR_FGT(SYS_TRCSSCSR(0),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(1),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(2),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(3),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(4),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(5),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(6),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(7),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSEQSTR,           HDFGRTR, TRCSEQSTR, 1),
+       SR_FGT(SYS_TRCPRGCTLR,          HDFGRTR, TRCPRGCTLR, 1),
+       SR_FGT(SYS_TRCOSLSR,            HDFGRTR, TRCOSLSR, 1),
+       SR_FGT(SYS_TRCIMSPEC(0),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(1),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(2),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(3),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(4),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(5),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(6),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(7),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCDEVARCH,          HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCDEVID,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR0,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR1,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR2,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR3,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR4,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR5,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR6,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR7,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR8,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR9,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR10,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR11,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR12,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR13,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCCNTVR(0),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(1),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(2),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(3),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCLAIMCLR,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCCLAIMSET,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCAUXCTLR,          HDFGRTR, TRCAUXCTLR, 1),
+       SR_FGT(SYS_TRCAUTHSTATUS,       HDFGRTR, TRCAUTHSTATUS, 1),
+       SR_FGT(SYS_TRCACATR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(8),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(9),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(10),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(11),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(12),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(13),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(14),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(15),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(0),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(1),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(2),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(3),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(4),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(5),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(6),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(7),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(8),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(9),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(10),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(11),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(12),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(13),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(14),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(15),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCBBCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCCCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR0,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR1,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(0),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(1),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(2),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(3),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCONFIGR,          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL0R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL1R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(0),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(1),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(2),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(3),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCQCTLR,            HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(8),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(9),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(10),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(11),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(12),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(13),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(14),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(15),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(16),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(17),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(18),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(19),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(20),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(21),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(22),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(23),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(24),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(25),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(26),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(27),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(28),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(29),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(30),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(31),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSR,              HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQRSTEVR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSTALLCTLR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSYNCPR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTRACEIDR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTSCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIIECTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIPCSSCTLR,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVISSCTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR0,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR1,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_PMSLATFR_EL1,        HDFGRTR, PMSLATFR_EL1, 1),
+       SR_FGT(SYS_PMSIRR_EL1,          HDFGRTR, PMSIRR_EL1, 1),
+       SR_FGT(SYS_PMSIDR_EL1,          HDFGRTR, PMSIDR_EL1, 1),
+       SR_FGT(SYS_PMSICR_EL1,          HDFGRTR, PMSICR_EL1, 1),
+       SR_FGT(SYS_PMSFCR_EL1,          HDFGRTR, PMSFCR_EL1, 1),
+       SR_FGT(SYS_PMSEVFR_EL1,         HDFGRTR, PMSEVFR_EL1, 1),
+       SR_FGT(SYS_PMSCR_EL1,           HDFGRTR, PMSCR_EL1, 1),
+       SR_FGT(SYS_PMBSR_EL1,           HDFGRTR, PMBSR_EL1, 1),
+       SR_FGT(SYS_PMBPTR_EL1,          HDFGRTR, PMBPTR_EL1, 1),
+       SR_FGT(SYS_PMBLIMITR_EL1,       HDFGRTR, PMBLIMITR_EL1, 1),
+       SR_FGT(SYS_PMMIR_EL1,           HDFGRTR, PMMIR_EL1, 1),
+       SR_FGT(SYS_PMSELR_EL0,          HDFGRTR, PMSELR_EL0, 1),
+       SR_FGT(SYS_PMOVSCLR_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMOVSSET_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMINTENCLR_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMINTENSET_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMCNTENCLR_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCNTENSET_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCCNTR_EL0,         HDFGRTR, PMCCNTR_EL0, 1),
+       SR_FGT(SYS_PMCCFILTR_EL0,       HDFGRTR, PMCCFILTR_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(0),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(1),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(2),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(3),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(4),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(5),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(6),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(7),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(8),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(9),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(10),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(11),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(12),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(13),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(14),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(15),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(16),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(17),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(18),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(19),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(20),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(21),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(22),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(23),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(24),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(25),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(26),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(27),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(28),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(29),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(30),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(0),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(1),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(2),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(3),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(4),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(5),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(6),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(7),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(8),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(9),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(10),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(11),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(12),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(13),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(14),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(15),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(16),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(17),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(18),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(19),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(20),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(21),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(22),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(23),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(24),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(25),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(26),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(27),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(28),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(29),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(30),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_OSDLR_EL1,           HDFGRTR, OSDLR_EL1, 1),
+       SR_FGT(SYS_OSECCR_EL1,          HDFGRTR, OSECCR_EL1, 1),
+       SR_FGT(SYS_OSLSR_EL1,           HDFGRTR, OSLSR_EL1, 1),
+       SR_FGT(SYS_DBGPRCR_EL1,         HDFGRTR, DBGPRCR_EL1, 1),
+       SR_FGT(SYS_DBGAUTHSTATUS_EL1,   HDFGRTR, DBGAUTHSTATUS_EL1, 1),
+       SR_FGT(SYS_DBGCLAIMSET_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_DBGCLAIMCLR_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_MDSCR_EL1,           HDFGRTR, MDSCR_EL1, 1),
+       /*
+        * The trap bits capture *64* debug registers per bit, but the
+        * ARM ARM only describes the encoding for the first 16, and
+        * we don't really support more than that anyway.
+        */
+       SR_FGT(SYS_DBGWVRn_EL1(0),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(1),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(2),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(3),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(4),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(5),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(6),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(7),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(8),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(9),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(10),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(11),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(12),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(13),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(14),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(15),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(0),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(1),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(2),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(3),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(4),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(5),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(6),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(7),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(8),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(9),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(10),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(11),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(12),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(13),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(14),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(15),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(0),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(1),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(2),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(3),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(4),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(5),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(6),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(7),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(8),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(9),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(10),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(11),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(12),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(13),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(14),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(15),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(0),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(1),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(2),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(3),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(4),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(5),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(6),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(7),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(8),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(9),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(10),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(11),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(12),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(13),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(14),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(15),     HDFGRTR, DBGBCRn_EL1, 1),
+       /*
+        * HDFGWTR_EL2
+        *
+        * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely
+        * overlap in their bit assignment, there are a number of bits
+        * that are RES0 on one side, and an actual trap bit on the
+        * other.  The policy chosen here is to describe all the
+        * read-side mappings, and only the write-side mappings that
+        * differ from the read side, and the trap handler will pick
+        * the correct shadow register based on the access type.
+        */
+       SR_FGT(SYS_TRFCR_EL1,           HDFGWTR, TRFCR_EL1, 1),
+       SR_FGT(SYS_TRCOSLAR,            HDFGWTR, TRCOSLAR, 1),
+       SR_FGT(SYS_PMCR_EL0,            HDFGWTR, PMCR_EL0, 1),
+       SR_FGT(SYS_PMSWINC_EL0,         HDFGWTR, PMSWINC_EL0, 1),
+       SR_FGT(SYS_OSLAR_EL1,           HDFGWTR, OSLAR_EL1, 1),
+};
+
+static union trap_config get_trap_config(u32 sysreg)
+{
+       return (union trap_config) {
+               .val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
+       };
+}
+
+static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
+                                      const char *type, int err)
+{
+       kvm_err("%s line %d encoding range "
+               "(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
+               type, tc->line,
+               sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
+               sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
+               sys_reg_Op2(tc->encoding),
+               sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
+               sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
+               sys_reg_Op2(tc->end),
+               err);
+}
+
+int __init populate_nv_trap_config(void)
+{
+       int ret = 0;
+
+       BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
+       BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
+       BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
+       BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
+               const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
+               void *prev;
+
+               if (cgt->tc.val & BIT(63)) {
+                       kvm_err("CGT[%d] has MBZ bit set\n", i);
+                       ret = -EINVAL;
+               }
+
+               if (cgt->encoding != cgt->end) {
+                       prev = xa_store_range(&sr_forward_xa,
+                                             cgt->encoding, cgt->end,
+                                             xa_mk_value(cgt->tc.val),
+                                             GFP_KERNEL);
+               } else {
+                       prev = xa_store(&sr_forward_xa, cgt->encoding,
+                                       xa_mk_value(cgt->tc.val), GFP_KERNEL);
+                       if (prev && !xa_is_err(prev)) {
+                               ret = -EINVAL;
+                               print_nv_trap_error(cgt, "Duplicate CGT", ret);
+                       }
+               }
+
+               if (xa_is_err(prev)) {
+                       ret = xa_err(prev);
+                       print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+               }
+       }
+
+       kvm_info("nv: %ld coarse grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_cgt));
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               goto check_mcb;
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
+               const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
+               union trap_config tc;
+
+               if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Invalid FGT", ret);
+               }
+
+               tc = get_trap_config(fgt->encoding);
+
+               if (tc.fgt) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Duplicate FGT", ret);
+               }
+
+               tc.val |= fgt->tc.val;
+               xa_store(&sr_forward_xa, fgt->encoding,
+                        xa_mk_value(tc.val), GFP_KERNEL);
+       }
+
+       kvm_info("nv: %ld fine grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_fgt));
+
+check_mcb:
+       for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
+               const enum cgt_group_id *cgids;
+
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+
+               for (int i = 0; cgids[i] != __RESERVED__; i++) {
+                       if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+                               kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
+                               ret = -EINVAL;
+                       }
+               }
+       }
+
+       if (ret)
+               xa_destroy(&sr_forward_xa);
+
+       return ret;
+}
+
+static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
+                                        const struct trap_bits *tb)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+       u64 val;
+
+       val = __vcpu_sys_reg(vcpu, tb->index);
+       if ((val & tb->mask) == tb->value)
+               b |= tb->behaviour;
+
+       return b;
+}
+
+static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                   const enum cgt_group_id id,
+                                                   enum trap_behaviour b)
+{
+       switch (id) {
+               const enum cgt_group_id *cgids;
+
+       case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
+               if (likely(id != __RESERVED__))
+                       b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
+               break;
+       case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
+               /* Yes, this is recursive. Don't do anything stupid. */
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+               for (int i = 0; cgids[i] != __RESERVED__; i++)
+                       b |= __compute_trap_behaviour(vcpu, cgids[i], b);
+               break;
+       default:
+               if (ARRAY_SIZE(ccc))
+                       b |= ccc[id -  __COMPLEX_CONDITIONS__](vcpu);
+               break;
+       }
+
+       return b;
+}
+
+static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                 const union trap_config tc)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+
+       return __compute_trap_behaviour(vcpu, tc.cgt, b);
+}
+
+static bool check_fgt_bit(u64 val, const union trap_config tc)
+{
+       return ((val >> tc.bit) & 1) == tc.pol;
+}
+
+#define sanitised_sys_reg(vcpu, reg)                   \
+       ({                                              \
+               u64 __val;                              \
+               __val = __vcpu_sys_reg(vcpu, reg);      \
+               __val &= ~__ ## reg ## _RES0;           \
+               (__val);                                \
+       })
+
+bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+{
+       union trap_config tc;
+       enum trap_behaviour b;
+       bool is_read;
+       u32 sysreg;
+       u64 esr, val;
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return false;
+
+       esr = kvm_vcpu_get_esr(vcpu);
+       sysreg = esr_sys64_to_sysreg(esr);
+       is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+       tc = get_trap_config(sysreg);
+
+       /*
+        * A value of 0 for the whole entry means that we know nothing
+        * for this sysreg, and that it cannot be re-injected into the
+        * nested hypervisor. In this situation, let's cut it short.
+        *
+        * Note that ultimately, we could also make use of the xarray
+        * to store the index of the sysreg in the local descriptor
+        * array, avoiding another search... Hint, hint...
+        */
+       if (!tc.val)
+               return false;
+
+       switch ((enum fgt_group_id)tc.fgt) {
+       case __NO_FGT_GROUP__:
+               break;
+
+       case HFGxTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+               break;
+
+       case HDFGRTR_GROUP:
+       case HDFGWTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+               break;
+
+       case HFGITR_GROUP:
+               val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+               switch (tc.fgf) {
+                       u64 tmp;
+
+               case __NO_FGF__:
+                       break;
+
+               case HCRX_FGTnXS:
+                       tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+                       if (tmp & HCRX_EL2_FGTnXS)
+                               tc.fgt = __NO_FGT_GROUP__;
+               }
+               break;
+
+       case __NR_FGT_GROUP_IDS__:
+               /* Something is really wrong, bail out */
+               WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+               return false;
+       }
+
+       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+               goto inject;
+
+       b = compute_trap_behaviour(vcpu, tc);
+
+       if (((b & BEHAVE_FORWARD_READ) && is_read) ||
+           ((b & BEHAVE_FORWARD_WRITE) && !is_read))
+               goto inject;
+
+       return false;
+
+inject:
+       trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
+
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+       return true;
+}
+
 static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
 {
        u64 mode = spsr & PSR_MODE_MASK;
index 20280a5..95f6945 100644 (file)
@@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
        return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
-       u32 target = kvm_target_cpu();
-
-       memset(init, 0, sizeof(*init));
-
-       /*
-        * For now, we don't return any features.
-        * In future, we might use features to return target
-        * specific features available for the preferred
-        * target type.
-        */
-       init->target = (__u32)target;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index 6dcd660..617ae6d 100644 (file)
@@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
        if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
                return kvm_handle_ptrauth(vcpu);
 
-       kvm_emulate_nested_eret(vcpu);
+       /*
+        * If we got here, two possibilities:
+        *
+        * - the guest is in EL2, and we need to fully emulate ERET
+        *
+        * - the guest is in EL1, and we need to reinject the
+         *   exception into the L1 hypervisor.
+        *
+        * If KVM ever traps ERET for its own use, we'll have to
+        * revisit this.
+        */
+       if (is_hyp_ctxt(vcpu))
+               kvm_emulate_nested_eret(vcpu);
+       else
+               kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+       return 1;
+}
+
+static int handle_svc(struct kvm_vcpu *vcpu)
+{
+       /*
+        * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
+        * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
+        * we should only have to deal with a 64 bit exception.
+        */
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
        return 1;
 }
 
@@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
        [ESR_ELx_EC_SMC32]      = handle_smc,
        [ESR_ELx_EC_HVC64]      = handle_hvc,
        [ESR_ELx_EC_SMC64]      = handle_smc,
+       [ESR_ELx_EC_SVC64]      = handle_svc,
        [ESR_ELx_EC_SYS64]      = kvm_handle_sys_reg,
        [ESR_ELx_EC_SVE]        = handle_sve,
        [ESR_ELx_EC_ERET]       = kvm_handle_eret,
index 4bddb85..9cfe6bd 100644 (file)
@@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
        }
 }
 
-static inline bool __hfgxtr_traps_required(void)
-{
-       if (cpus_have_final_cap(ARM64_SME))
-               return true;
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               return true;
+#define compute_clr_set(vcpu, reg, clr, set)                           \
+       do {                                                            \
+               u64 hfg;                                                \
+               hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0;  \
+               set |= hfg & __ ## reg ## _MASK;                        \
+               clr |= ~hfg & __ ## reg ## _nMASK;                      \
+       } while(0)
 
-       return false;
-}
 
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
        u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       u64 r_val, w_val;
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
 
        if (cpus_have_final_cap(ARM64_SME)) {
                tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
        if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
                w_set |= HFGxTR_EL2_TCR_EL1_MASK;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+               compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+               compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+       }
+
+       /* The default is not to trap anything but ACCDATA_EL1 */
+       r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+
+       r_set = r_clr = 0;
+       compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
+       r_val = __HFGITR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       write_sysreg_s(r_val, SYS_HFGITR_EL2);
+
+       ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
+
+       r_clr = r_set = w_clr = w_set = 0;
+
+       compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
+       compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
+
+       r_val = __HDFGRTR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HDFGWTR_EL2_nMASK;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
 }
 
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
-       u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
 
-       if (cpus_have_final_cap(ARM64_SME)) {
-               tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
 
-               r_set |= tmp;
-               w_set |= tmp;
-       }
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
 
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
 }
 
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
        vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 
-       if (__hfgxtr_traps_required())
-               __activate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+               u64 hcrx = HCRX_GUEST_FLAGS;
+               if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+                       u64 clr = 0, set = 0;
+
+                       compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+                       hcrx |= set;
+                       hcrx &= ~clr;
+               }
+
+               write_sysreg_s(hcrx, SYS_HCRX_EL2);
+       }
+
+       __activate_traps_hfgxtr(vcpu);
 }
 
 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
                vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
        }
 
-       if (__hfgxtr_traps_required())
-               __deactivate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX))
+               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+       __deactivate_traps_hfgxtr(vcpu);
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
 
        if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
                write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
                vcpu->arch.hcr_el2 &= ~HCR_VSE;
                vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
        }
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
@@ -457,6 +518,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
         */
        val &= ~(TCR_HD | TCR_HA);
        write_sysreg_el1(val, SYS_TCR);
+       __kvm_skip_instr(vcpu);
        return true;
 }
 
index d5ec972..230e4f2 100644 (file)
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
 int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
                                  enum kvm_pgtable_prot prot,
                                  unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
 
 #endif /* __KVM_HYP_MM_H */
index 58dcd92..ab4f5d1 100644 (file)
@@ -705,7 +705,20 @@ int hyp_ffa_init(void *pages)
        if (res.a0 == FFA_RET_NOT_SUPPORTED)
                return 0;
 
-       if (res.a0 != FFA_VERSION_1_0)
+       /*
+        * Firmware returns the maximum supported version of the FF-A
+        * implementation. Check that the returned version is
+        * backwards-compatible with the hyp according to the rules in DEN0077A
+        * v1.1 REL0 13.2.1.
+        *
+        * Of course, things are never simple when dealing with firmware. v1.1
+        * broke ABI with v1.0 on several structures, which is itself
+        * incompatible with the aforementioned versioning scheme. The
+        * expectation is that v1.x implementations that do not support the v1.0
+        * ABI return NOT_SUPPORTED rather than a version number, according to
+        * DEN0077A v1.1 REL0 18.6.4.
+        */
+       if (FFA_MAJOR_VERSION(res.a0) != 1)
                return -EOPNOTSUPP;
 
        arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
index a169c61..857d9bc 100644 (file)
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
        __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
 }
 
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+       DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+       __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
        HANDLE_FUNC(__kvm_tlb_flush_vmid),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
        HANDLE_FUNC(__kvm_flush_cpu_context),
        HANDLE_FUNC(__kvm_timer_set_cntvoff),
        HANDLE_FUNC(__vgic_v3_read_vmcr),
index 318298e..65a7a18 100644 (file)
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
        return err;
 }
 
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+       unsigned long cur;
+
+       hyp_assert_lock_held(&pkvm_pgd_lock);
+
+       if (!start || start < __io_map_base)
+               return -EINVAL;
+
+       /* The allocated size is always a multiple of PAGE_SIZE */
+       cur = start + PAGE_ALIGN(size);
+
+       /* Are we overflowing on the vmemmap ? */
+       if (cur > __hyp_vmemmap)
+               return -ENOMEM;
+
+       __io_map_base = cur;
+
+       return 0;
+}
+
 /**
  * pkvm_alloc_private_va_range - Allocates a private VA range.
  * @size:      The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
  */
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
 {
-       unsigned long base, addr;
-       int ret = 0;
+       unsigned long addr;
+       int ret;
 
        hyp_spin_lock(&pkvm_pgd_lock);
-
-       /* Align the allocation based on the order of its size */
-       addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
-       /* The allocated size is always a multiple of PAGE_SIZE */
-       base = addr + PAGE_ALIGN(size);
-
-       /* Are we overflowing on the vmemmap ? */
-       if (!addr || base > __hyp_vmemmap)
-               ret = -ENOMEM;
-       else {
-               __io_map_base = base;
-               *haddr = addr;
-       }
-
+       addr = __io_map_base;
+       ret = __pkvm_alloc_private_va_range(addr, size);
        hyp_spin_unlock(&pkvm_pgd_lock);
 
+       *haddr = addr;
+
        return ret;
 }
 
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
        return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
 }
 
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+       unsigned long addr, prev_base;
+       size_t size;
+       int ret;
+
+       hyp_spin_lock(&pkvm_pgd_lock);
+
+       prev_base = __io_map_base;
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       addr = ALIGN(__io_map_base, size);
+
+       ret = __pkvm_alloc_private_va_range(addr, size);
+       if (!ret) {
+               /*
+                * Since the stack grows downwards, map the stack to the page
+                * at the higher address and leave the lower guard page
+                * unbacked.
+                *
+                * Any valid stack address now has the PAGE_SHIFT bit as 1
+                * and addresses corresponding to the guard page have the
+                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+                */
+               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+                                         PAGE_SIZE, phys, PAGE_HYP);
+               if (ret)
+                       __io_map_base = prev_base;
+       }
+       hyp_spin_unlock(&pkvm_pgd_lock);
+
+       *haddr = addr + size;
+
+       return ret;
+}
+
 static void *admit_host_page(void *arg)
 {
        struct kvm_hyp_memcache *host_mc = arg;
index bb98630..0d5e0a8 100644 (file)
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 
        for (i = 0; i < hyp_nr_cpus; i++) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
-               unsigned long hyp_addr;
 
                start = (void *)kern_hyp_va(per_cpu_base[i]);
                end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
                if (ret)
                        return ret;
 
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+               ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
                if (ret)
                        return ret;
-
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               hyp_spin_lock(&pkvm_pgd_lock);
-               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
-                                       PAGE_SIZE, params->stack_pa, PAGE_HYP);
-               hyp_spin_unlock(&pkvm_pgd_lock);
-               if (ret)
-                       return ret;
-
-               /* Update stack_hyp_va to end of the stack's private VA range */
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        /*
index 0a62710..c353a06 100644 (file)
@@ -63,7 +63,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                __activate_traps_fpsimd32(vcpu);
        }
 
-       write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
        write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
 
        if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
                 * KVM_ARM_VCPU_INIT, however, this is likely not possible for
                 * protected VMs.
                 */
-               vcpu->arch.target = -1;
+               vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
                *exit_code |= ARM_EXCEPTION_IL;
        }
index b9991bb..1b26571 100644 (file)
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt, false);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+       if (icache_is_vpipt())
+               icache_inval_all_pou();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index f7a93ef..f155b8c 100644 (file)
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
        return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
 }
 
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size)
+{
+       unsigned long pages, inval_pages;
+
+       if (!system_supports_tlb_range()) {
+               kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+               return;
+       }
+
+       pages = size >> PAGE_SHIFT;
+       while (pages > 0) {
+               inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+               addr += inval_pages << PAGE_SHIFT;
+               pages -= inval_pages;
+       }
+}
+
 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
 
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
                 * evicted pte value (if any).
                 */
                if (kvm_pte_table(ctx->old, ctx->level))
-                       kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+                       kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+                                               kvm_granule_size(ctx->level));
                else if (kvm_pte_valid(ctx->old))
                        kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
                                     ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
        smp_store_release(ctx->ptep, new);
 }
 
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-                          struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+       /*
+        * If FEAT_TLBIRANGE is implemented, defer the individual
+        * TLB invalidations until the entire walk is finished, and
+        * then use the range-based TLBI instructions to do the
+        * invalidations. Condition deferred TLB invalidation on the
+        * system supporting FWB as the optimization is entirely
+        * pointless when the unmap walker needs to perform CMOs.
+        */
+       return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+                               struct kvm_s2_mmu *mmu,
+                               struct kvm_pgtable_mm_ops *mm_ops)
 {
+       struct kvm_pgtable *pgt = ctx->arg;
+
        /*
-        * Clear the existing PTE, and perform break-before-make with
-        * TLB maintenance if it was valid.
+        * Clear the existing PTE, and perform break-before-make if it was
+        * valid. Depending on the system support, defer the TLB maintenance
+        * for the same until the entire unmap walk is completed.
         */
        if (kvm_pte_valid(ctx->old)) {
                kvm_clear_pte(ctx->ptep);
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+               if (!stage2_unmap_defer_tlb_flush(pgt))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                       ctx->addr, ctx->level);
        }
 
        mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
         * block entry and rely on the remaining portions being faulted
         * back lazily.
         */
-       stage2_put_pte(ctx, mmu, mm_ops);
+       stage2_unmap_put_pte(ctx, mmu, mm_ops);
 
        if (need_flush && mm_ops->dcache_clean_inval_poc)
                mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
+       int ret;
        struct kvm_pgtable_walker walker = {
                .cb     = stage2_unmap_walker,
                .arg    = pgt,
                .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
        };
 
-       return kvm_pgtable_walk(pgt, addr, size, &walker);
+       ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+       if (stage2_unmap_defer_tlb_flush(pgt))
+               /* Perform the deferred TLB invalidations */
+               kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+       return ret;
 }
 
 struct stage2_attr_data {
index e69da55..46bd43f 100644 (file)
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       dsb(ishst);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index d3b4fee..587a104 100644 (file)
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
  * @kvm:       pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-       ++kvm->stat.generic.remote_tlb_flush_requests;
        kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+       return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                     gfn_t gfn, u64 nr_pages)
+{
+       kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+                               gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+       return 0;
 }
 
 static bool kvm_is_device_pfn(unsigned long pfn)
@@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
        return 0;
 }
 
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+       lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+       if (!PAGE_ALIGNED(base))
+               return -EINVAL;
+
+       /*
+        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+        * allocating the new area, as it would indicate we've
+        * overflowed the idmap/IO address range.
+        */
+       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+               return -ENOMEM;
+
+       io_map_base = base;
+
+       return 0;
+}
 
 /**
  * hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
 
        /*
         * This assumes that we have enough space below the idmap
-        * page to allocate our VAs. If not, the check below will
-        * kick. A potential alternative would be to detect that
-        * overflow and switch to an allocation above the idmap.
+        * page to allocate our VAs. If not, the check in
+        * __hyp_alloc_private_va_range() will kick. A potential
+        * alternative would be to detect that overflow and switch
+        * to an allocation above the idmap.
         *
         * The allocated size is always a multiple of PAGE_SIZE.
         */
-       base = io_map_base - PAGE_ALIGN(size);
-
-       /* Align the allocation based on the order of its size */
-       base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
-       /*
-        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
-        * allocating the new area, as it would indicate we've
-        * overflowed the idmap/IO address range.
-        */
-       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
-               ret = -ENOMEM;
-       else
-               *haddr = io_map_base = base;
+       size = PAGE_ALIGN(size);
+       base = io_map_base - size;
+       ret = __hyp_alloc_private_va_range(base);
 
        mutex_unlock(&kvm_hyp_pgd_mutex);
 
@@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
        return ret;
 }
 
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+       unsigned long base;
+       size_t size;
+       int ret;
+
+       mutex_lock(&kvm_hyp_pgd_mutex);
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       base = ALIGN_DOWN(io_map_base - size, size);
+
+       ret = __hyp_alloc_private_va_range(base);
+
+       mutex_unlock(&kvm_hyp_pgd_mutex);
+
+       if (ret) {
+               kvm_err("Cannot allocate hyp stack guard page\n");
+               return ret;
+       }
+
+       /*
+        * Since the stack grows downwards, map the stack to the page
+        * at the higher address and leave the lower guard page
+        * unbacked.
+        *
+        * Any valid stack address now has the PAGE_SHIFT bit as 1
+        * and addresses corresponding to the guard page have the
+        * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+        */
+       ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+                                   PAGE_HYP);
+       if (ret)
+               kvm_err("Cannot map hyp stack\n");
+
+       *haddr = base + size;
+
+       return ret;
+}
+
 /**
  * create_hyp_io_mappings - Map IO into both kernel and HYP
  * @phys_addr: The physical start address which gets mapped
@@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
        write_lock(&kvm->mmu_lock);
        stage2_wp_range(&kvm->arch.mmu, start, end);
        write_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
+       kvm_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
 /**
@@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
        read_unlock(&kvm->mmu_lock);
-       kvm_set_pfn_accessed(pfn);
        kvm_release_pfn_clean(pfn);
        return ret != -EAGAIN ? ret : 0;
 }
@@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.mmu.pgt)
                return false;
index 315354d..042695a 100644 (file)
@@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR0_EL1:
-               /* Hide ECV, FGT, ExS, Secure Memory */
-               val &= ~(GENMASK_ULL(63, 43)            |
+               /* Hide ECV, ExS, Secure Memory */
+               val &= ~(NV_FTR(MMFR0, ECV)             |
+                        NV_FTR(MMFR0, EXS)             |
                         NV_FTR(MMFR0, TGRAN4_2)        |
                         NV_FTR(MMFR0, TGRAN16_2)       |
                         NV_FTR(MMFR0, TGRAN64_2)       |
@@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR1_EL1:
-               val &= (NV_FTR(MMFR1, PAN)      |
+               val &= (NV_FTR(MMFR1, HCX)      |
+                       NV_FTR(MMFR1, PAN)      |
                        NV_FTR(MMFR1, LO)       |
                        NV_FTR(MMFR1, HPDS)     |
                        NV_FTR(MMFR1, VH)       |
@@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR2_EL1:
-               val &= ~(NV_FTR(MMFR2, EVT)     |
-                        NV_FTR(MMFR2, BBM)     |
+               val &= ~(NV_FTR(MMFR2, BBM)     |
                         NV_FTR(MMFR2, TTL)     |
                         GENMASK_ULL(47, 44)    |
                         NV_FTR(MMFR2, ST)      |
index 5606509..6b066e0 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
+#include <asm/arm_pmuv3.h>
 
 #define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
 
@@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
        return &vcpu->arch.pmu.pmc[cnt_idx];
 }
 
-static u32 kvm_pmu_event_mask(struct kvm *kvm)
+static u32 __kvm_pmu_event_mask(unsigned int pmuver)
 {
-       unsigned int pmuver;
-
-       pmuver = kvm->arch.arm_pmu->pmuver;
-
        switch (pmuver) {
        case ID_AA64DFR0_EL1_PMUVer_IMP:
                return GENMASK(9, 0);
@@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
        }
 }
 
+static u32 kvm_pmu_event_mask(struct kvm *kvm)
+{
+       u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
+       u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
+
+       return __kvm_pmu_event_mask(pmuver);
+}
+
 /**
  * kvm_pmc_is_64bit - determine if counter is 64bit
  * @pmc: counter context
@@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
        struct arm_pmu_entry *entry;
 
-       if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
-           pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+       /*
+        * Check the sanitised PMU version for the system, as KVM does not
+        * support implementations where PMUv3 exists on a subset of CPUs.
+        */
+       if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
                return;
 
        mutex_lock(&arm_pmus_lock);
@@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        } else {
                val = read_sysreg(pmceid1_el0);
                /*
-                * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+                * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
                 * as RAZ
                 */
-               if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
-                       val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
+               val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
                base = 32;
        }
 
@@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
                return 0;
        }
        case KVM_ARM_VCPU_PMU_V3_FILTER: {
+               u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
                struct kvm_pmu_event_filter __user *uaddr;
                struct kvm_pmu_event_filter filter;
                int nr_events;
 
-               nr_events = kvm_pmu_event_mask(kvm) + 1;
+               /*
+                * Allow userspace to specify an event filter for the entire
+                * event range supported by PMUVer of the hardware, rather
+                * than the guest's PMUVer for KVM backward compatibility.
+                */
+               nr_events = __kvm_pmu_event_mask(pmuver) + 1;
 
                uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
 
index 121f1a1..0eea225 100644 (file)
@@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
        ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
        return true;
 }
+
+/*
+ * If we interrupted the guest to update the host PMU context, make
+ * sure we re-apply the guest EL0 state.
+ */
+void kvm_vcpu_pmu_resync_el0(void)
+{
+       struct kvm_vcpu *vcpu;
+
+       if (!has_vhe() || !in_interrupt())
+               return;
+
+       vcpu = kvm_get_running_vcpu();
+       if (!vcpu)
+               return;
+
+       kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
+}
index bc8556b..7a65a35 100644 (file)
@@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
                }
        }
 
-       switch (vcpu->arch.target) {
-       default:
-               if (vcpu_el1_is_32bit(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_SVC;
-               } else if (vcpu_has_nv(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_EL2;
-               } else {
-                       pstate = VCPU_RESET_PSTATE_EL1;
-               }
-
-               if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-               break;
+       if (vcpu_el1_is_32bit(vcpu))
+               pstate = VCPU_RESET_PSTATE_SVC;
+       else if (vcpu_has_nv(vcpu))
+               pstate = VCPU_RESET_PSTATE_EL2;
+       else
+               pstate = VCPU_RESET_PSTATE_EL1;
+
+       if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+               ret = -EINVAL;
+               goto out;
        }
 
        /* Reset core registers */
index 2ca2973..e92ec81 100644 (file)
@@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
        { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
 
+       { SYS_DESC(SYS_ACCDATA_EL1), undef_access },
+
        { SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
 
        { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
@@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
        EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
        EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
        EL2_REG(HACR_EL2, access_rw, reset_val, 0),
 
+       EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+
        EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
        EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
        EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
@@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
 
        { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+       EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
        EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
        EL2_REG(ELR_EL2, access_rw, reset_val, 0),
        { SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
        trace_kvm_handle_sys_reg(esr);
 
+       if (__check_nv_sr_forward(vcpu))
+               return 1;
+
        params = esr_sys64_to_params(esr);
        params.regval = vcpu_get_reg(vcpu, Rt);
 
@@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
        if (!first_idreg)
                return -EINVAL;
 
+       if (kvm_get_mode() == KVM_MODE_NV)
+               return populate_nv_trap_config();
+
        return 0;
 }
index 6ce5c02..8ad5310 100644 (file)
@@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
                  __entry->hcr_el2)
 );
 
+TRACE_EVENT(kvm_forward_sysreg_trap,
+           TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
+           TP_ARGS(vcpu, sysreg, is_read),
+
+           TP_STRUCT__entry(
+               __field(u64,    pc)
+               __field(u32,    sysreg)
+               __field(bool,   is_read)
+           ),
+
+           TP_fast_assign(
+               __entry->pc = *vcpu_pc(vcpu);
+               __entry->sysreg = sysreg;
+               __entry->is_read = is_read;
+           ),
+
+           TP_printk("%llx %c (%d,%d,%d,%d,%d)",
+                     __entry->pc,
+                     __entry->is_read ? 'R' : 'W',
+                     sys_reg_Op0(__entry->sysreg),
+                     sys_reg_Op1(__entry->sysreg),
+                     sys_reg_CRn(__entry->sysreg),
+                     sys_reg_CRm(__entry->sysreg),
+                     sys_reg_Op2(__entry->sysreg))
+);
+
 #endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index f9923be..0ab09b0 100644 (file)
@@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
                         int offset, u32 *val);
@@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
index c80ed4f..c3f06fd 100644 (file)
@@ -26,6 +26,7 @@ HAS_ECV
 HAS_ECV_CNTPOFF
 HAS_EPAN
 HAS_EVT
+HAS_FGT
 HAS_GENERIC_AUTH
 HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
index 65866bf..2517ef7 100644 (file)
@@ -2156,6 +2156,135 @@ Field   1       ICIALLU
 Field  0       ICIALLUIS
 EndSysreg
 
+Sysreg HDFGRTR_EL2     3       4       3       1       4
+Field  63      PMBIDR_EL1
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Field  59      nBRBIDR
+Field  58      PMCEIDn_EL0
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Field  51      TRBIDR_EL1
+Field  50      TRBBASER_EL1
+Res0   49
+Field  48      TRCVICTLR
+Field  47      TRCSTATR
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Field  43      TRCOSLSR
+Res0   42
+Field  41      TRCIMSPECn
+Field  40      TRCID
+Res0   39:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Field  34      TRCAUTHSTATUS
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Field  30      PMSIDR_EL1
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Field  22      PMMIR_EL1
+Res0   21:20
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Field  9       OSLSR_EL1
+Res0   8
+Field  7       DBGPRCR_EL1
+Field  6       DBGAUTHSTATUS_EL1
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
+Sysreg HDFGWTR_EL2     3       4       3       1       5
+Res0   63
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Res0   59:58
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Res0   51
+Field  50      TRBBASER_EL1
+Field  49      TRFCR_EL1
+Field  48      TRCVICTLR
+Res0   47
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Res0   43
+Field  42      TRCOSLAR
+Field  41      TRCIMSPECn
+Res0   40:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Res0   34
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Res0   30
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Res0   22
+Field  21      PMCR_EL0
+Field  20      PMSWINC_EL0
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Res0   9
+Field  8       OSLAR_EL1
+Field  7       DBGPRCR_EL1
+Res0   6
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
 Sysreg ZCR_EL2 3       4       1       2       0
 Fields ZCR_ELx
 EndSysreg
index d1978e0..47e3801 100644 (file)
@@ -634,7 +634,6 @@ ia64_imva (void *addr)
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 #define PREFETCH_STRIDE                        L1_CACHE_BYTES
 
 static inline void
@@ -649,8 +648,6 @@ prefetchw (const void *x)
        ia64_lfetch_excl(ia64_lfhint_none, x);
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 extern unsigned long boot_option_idle_override;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
index e71d5bf..465759f 100644 (file)
@@ -662,5 +662,3 @@ source "kernel/power/Kconfig"
 source "drivers/acpi/Kconfig"
 
 endmenu
-
-source "drivers/firmware/Kconfig"
index b1e5db5..ef87bab 100644 (file)
@@ -83,8 +83,8 @@ KBUILD_CFLAGS_KERNEL          += -fPIE
 LDFLAGS_vmlinux                        += -static -pie --no-dynamic-linker -z notext
 endif
 
-cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
+cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
index 6b222f2..93783fa 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += dma-contiguous.h
-generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
index b541f62..c2d8962 100644 (file)
@@ -173,16 +173,30 @@ static inline void restore_fp(struct task_struct *tsk)
                _restore_fp(&tsk->thread.fpu);
 }
 
-static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
+static inline void save_fpu_regs(struct task_struct *tsk)
 {
+       unsigned int euen;
+
        if (tsk == current) {
                preempt_disable();
-               if (is_fpu_owner())
+
+               euen = csr_read32(LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+               if (euen & CSR_EUEN_LASXEN)
+                       _save_lasx(&current->thread.fpu);
+               else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+               if (euen & CSR_EUEN_LSXEN)
+                       _save_lsx(&current->thread.fpu);
+               else
+#endif
+               if (euen & CSR_EUEN_FPEN)
                        _save_fp(&current->thread.fpu);
+
                preempt_enable();
        }
-
-       return tsk->thread.fpu.fpr;
 }
 
 static inline int is_simd_owner(void)
index 35f0958..f3ddaed 100644 (file)
@@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val
 #define instruction_pointer(regs) ((regs)->csr_era)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern void die(const char *, struct pt_regs *) __noreturn;
+extern void die(const char *str, struct pt_regs *regs);
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
index 416b653..66ecb48 100644 (file)
@@ -98,8 +98,6 @@ static inline void __cpu_die(unsigned int cpu)
 {
        loongson_cpu_die(cpu);
 }
-
-extern void __noreturn play_dead(void);
 #endif
 
 #endif /* __ASM_SMP_H */
index f3df5f0..501094a 100644 (file)
@@ -6,12 +6,12 @@
  *
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
index 021b59c..fc55c4d 100644 (file)
@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct perf_event *bp,
                        write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
                } else {
                        ctrl = encode_ctrl_reg(info->ctrl);
-                       write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
-                                    1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+                       write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
                }
                enable = csr_read64(LOONGARCH_CSR_CRMD);
                csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
index cb8e580..3015896 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
index e16ab0b..482aa55 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
index 2e04eb0..4ee1e9d 100644 (file)
@@ -61,13 +61,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-#ifdef CONFIG_HOTPLUG_CPU
-void __noreturn arch_cpu_idle_dead(void)
-{
-       play_dead();
-}
-#endif
-
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
index a0767c3..f72adbf 100644 (file)
@@ -147,6 +147,8 @@ static int fpr_get(struct task_struct *target,
 {
        int r;
 
+       save_fpu_regs(target);
+
        if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
                r = gfpr_get(target, &to);
        else
@@ -278,6 +280,8 @@ static int simd_get(struct task_struct *target,
 {
        const unsigned int wr_size = NUM_FPU_REGS * regset->size;
 
+       save_fpu_regs(target);
+
        if (!tsk_used_math(target)) {
                /* The task hasn't used FP or LSX, fill with 0xff */
                copy_pad_fprs(target, regset, &to, 0);
index 8ea1bbc..6667b0a 100644 (file)
@@ -317,7 +317,7 @@ void loongson_cpu_die(unsigned int cpu)
        mb();
 }
 
-void play_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
 {
        register uint64_t addr;
        register void (*init_fn)(void);
index 8fb5e7a..89699db 100644 (file)
@@ -383,16 +383,15 @@ void show_registers(struct pt_regs *regs)
 
 static DEFINE_RAW_SPINLOCK(die_lock);
 
-void __noreturn die(const char *str, struct pt_regs *regs)
+void die(const char *str, struct pt_regs *regs)
 {
+       int ret;
        static int die_counter;
-       int sig = SIGSEGV;
 
        oops_enter();
 
-       if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr,
-                      SIGSEGV) == NOTIFY_STOP)
-               sig = 0;
+       ret = notify_die(DIE_OOPS, str, regs, 0,
+                        current->thread.trap_nr, SIGSEGV);
 
        console_verbose();
        raw_spin_lock_irq(&die_lock);
@@ -405,6 +404,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
        oops_exit();
 
+       if (ret == NOTIFY_STOP)
+               return;
+
        if (regs && kexec_should_crash(current))
                crash_kexec(regs);
 
@@ -414,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
        if (panic_on_oops)
                panic("Fatal exception");
 
-       make_task_dead(sig);
+       make_task_dead(SIGSEGV);
 }
 
 static inline void setup_vint_size(unsigned int size)
index 9dcf717..0790ead 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
index fecd08c..bfe3d27 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
index 39ce662..cc30b3b 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memcpy)
index 45b725b..7dc76d1 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memmove)
index b39c619..3f20f79 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .macro fill_to_64 r0
index 9177fd6..185f82d 100644 (file)
@@ -9,7 +9,6 @@
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .L_fixup_handle_unaligned:
index 4c874a7..7ad7655 100644 (file)
@@ -2,9 +2,9 @@
 /*
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/page.h>
 #include <asm/regdef.h>
 
index 4ad7870..ca17dd3 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
index 04cedf9..54a85f1 100644 (file)
@@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 
 #endif /* __MIPS_KVM_HOST_H__ */
index 9151dcd..af9cea2 100644 (file)
@@ -58,8 +58,6 @@
 
 #define cpu_has_rixi           (cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
 
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-#define spin_lock_prefetch(x) prefetch(x)
 #define PREFETCH_STRIDE 128
 
 #ifdef __OCTEON__
index aa5583a..231ac05 100644 (file)
@@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
        /* Flush slot from GPA */
        kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
                              slot->base_gfn + slot->npages - 1);
-       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+       kvm_flush_remote_tlbs_memslot(kvm, slot);
        spin_unlock(&kvm->mmu_lock);
 }
 
@@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
                                        new->base_gfn + new->npages - 1);
                if (needs_flush)
-                       kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+                       kvm_flush_remote_tlbs_memslot(kvm, new);
                spin_unlock(&kvm->mmu_lock);
        }
 }
@@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        kvm_mips_callbacks->prepare_flush_shadow(kvm);
        return 1;
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        int r;
index e8c0898..7b2ac13 100644 (file)
@@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        gpa_t gpa = range->start << PAGE_SHIFT;
-       pte_t hva_pte = range->pte;
+       pte_t hva_pte = range->arg.pte;
        pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
        pte_t old_pte;
 
index 1401e4c..bf2b21b 100644 (file)
@@ -2,7 +2,7 @@
 #
 config LIGHTWEIGHT_SPINLOCK_CHECK
        bool "Enable lightweight spinlock checks"
-       depends on SMP && !DEBUG_SPINLOCK
+       depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK
        default y
        help
          Add checks with low performance impact to the spinlock functions
index 7ee49f5..d389359 100644 (file)
@@ -117,7 +117,7 @@ char *strchr(const char *s, int c)
        return NULL;
 }
 
-int puts(const char *s)
+static int puts(const char *s)
 {
        const char *nuline = s;
 
@@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base)
        return 0;
 }
 
-int printf(const char *fmt, ...)
+static int printf(const char *fmt, ...)
 {
        va_list args;
        int i = 0;
@@ -204,13 +204,13 @@ void abort(void)
 }
 
 #undef malloc
-void *malloc(size_t size)
+static void *malloc(size_t size)
 {
        return malloc_gzip(size);
 }
 
 #undef free
-void free(void *ptr)
+static void free(void *ptr)
 {
        return free_gzip(ptr);
 }
@@ -278,7 +278,7 @@ static void parse_elf(void *output)
        free(phdrs);
 }
 
-unsigned long decompress_kernel(unsigned int started_wide,
+asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide,
                unsigned int command_line,
                const unsigned int rd_start,
                const unsigned int rd_end)
index 9e8c101..582fb5d 100644 (file)
@@ -14,6 +14,8 @@
 #define dma_outb       outb
 #define dma_inb                inb
 
+extern unsigned long pcxl_dma_start;
+
 /*
 ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
 ** (or rather not merge) DMAs into manageable chunks.
index a7cf0d0..f1cc1ee 100644 (file)
@@ -12,6 +12,10 @@ extern void mcount(void);
 extern unsigned long sys_call_table[];
 
 extern unsigned long return_address(unsigned int);
+struct ftrace_regs;
+extern void ftrace_function_trampoline(unsigned long parent,
+               unsigned long self_addr, unsigned long org_sp_gr3,
+               struct ftrace_regs *fregs);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_caller(void);
index edfcb98..0b326e5 100644 (file)
@@ -7,8 +7,6 @@
 #include <asm/processor.h>
 #include <asm/spinlock_types.h>
 
-#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
-
 static inline void arch_spin_val_check(int lock_val)
 {
        if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK))
index d659340..efd06a8 100644 (file)
@@ -4,6 +4,10 @@
 
 #define __ARCH_SPIN_LOCK_UNLOCKED_VAL  0x1a46
 
+#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
+
+#ifndef __ASSEMBLY__
+
 typedef struct {
 #ifdef CONFIG_PA20
        volatile unsigned int slock;
@@ -27,6 +31,8 @@ typedef struct {
        volatile unsigned int   counter;
 } arch_rwlock_t;
 
+#endif /* __ASSEMBLY__ */
+
 #define __ARCH_RW_LOCK_UNLOCKED__       0x01000000
 #define __ARCH_RW_LOCK_UNLOCKED         { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \
                                        .counter = __ARCH_RW_LOCK_UNLOCKED__ }
index 0e5ebfe..ae03b86 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/traps.h>
 #include <asm/thread_info.h>
 #include <asm/alternative.h>
+#include <asm/spinlock_types.h>
 
 #include <linux/linkage.h>
 #include <linux/pgtable.h>
        LDREG           0(\ptp),\pte
        bb,<,n          \pte,_PAGE_PRESENT_BIT,3f
        b               \fault
-       stw             \spc,0(\tmp)
+       stw             \tmp1,0(\tmp)
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 2:     LDREG           0(\ptp),\pte
        .endm
 
        /* Release page_table_lock without reloading lock address.
-          Note that the values in the register spc are limited to
-          NR_SPACE_IDS (262144). Thus, the stw instruction always
-          stores a nonzero value even when register spc is 64 bits.
           We use an ordered store to ensure all prior accesses are
           performed prior to releasing the lock. */
-       .macro          ptl_unlock0     spc,tmp
+       .macro          ptl_unlock0     spc,tmp,tmp2
 #ifdef CONFIG_TLB_PTLOCK
-98:    or,COND(=)      %r0,\spc,%r0
-       stw,ma          \spc,0(\tmp)
+98:    ldi             __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2
+       or,COND(=)      %r0,\spc,%r0
+       stw,ma          \tmp2,0(\tmp)
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
        .endm
 
        /* Release page_table_lock. */
-       .macro          ptl_unlock1     spc,tmp
+       .macro          ptl_unlock1     spc,tmp,tmp2
 #ifdef CONFIG_TLB_PTLOCK
 98:    get_ptl         \tmp
-       ptl_unlock0     \spc,\tmp
+       ptl_unlock0     \spc,\tmp,\tmp2
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
        .endm
@@ -1125,7 +1124,7 @@ dtlb_miss_20w:
        
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1151,7 +1150,7 @@ nadtlb_miss_20w:
 
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1185,7 +1184,7 @@ dtlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1218,7 +1217,7 @@ nadtlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1247,7 +1246,7 @@ dtlb_miss_20:
 
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1275,7 +1274,7 @@ nadtlb_miss_20:
        
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1320,7 +1319,7 @@ itlb_miss_20w:
        
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1344,7 +1343,7 @@ naitlb_miss_20w:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1378,7 +1377,7 @@ itlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1402,7 +1401,7 @@ naitlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1432,7 +1431,7 @@ itlb_miss_20:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1452,7 +1451,7 @@ naitlb_miss_20:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1482,7 +1481,7 @@ dbit_trap_20w:
                
        idtlbt          pte,prot
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 #else
@@ -1508,7 +1507,7 @@ dbit_trap_11:
 
        mtsp            t1, %sr1     /* Restore sr1 */
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 
@@ -1528,7 +1527,7 @@ dbit_trap_20:
        
        idtlbt          pte,prot
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 #endif
index 6d1c781..8f37e75 100644 (file)
@@ -74,8 +74,8 @@
 static DEFINE_SPINLOCK(pdc_lock);
 #endif
 
-unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
-unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
+static unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
+static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
@@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void)
 /**
  * pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state
  */
-void pdc_cpu_rendezvous_lock(void)
+void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock)
 {
        spin_lock(&pdc_lock);
 }
@@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void)
 /**
  * pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state
  */
-void pdc_cpu_rendezvous_unlock(void)
+void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock)
 {
        spin_unlock(&pdc_lock);
 }
index 4d392e4..d1defb9 100644 (file)
@@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
 
 static ftrace_func_t ftrace_func;
 
-void notrace __hot ftrace_function_trampoline(unsigned long parent,
+asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
                                unsigned long self_addr,
                                unsigned long org_sp_gr3,
                                struct ftrace_regs *fregs)
index 00297e8..6f0c92e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
+#include <linux/libgcc.h>
 
 #include <linux/string.h>
 EXPORT_SYMBOL(memset);
@@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12);
 EXPORT_SYMBOL($$divI_14);
 EXPORT_SYMBOL($$divI_15);
 
-extern void __ashrdi3(void);
-extern void __ashldi3(void);
-extern void __lshrdi3(void);
-extern void __muldi3(void);
-extern void __ucmpdi2(void);
-
 EXPORT_SYMBOL(__ashrdi3);
 EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__lshrdi3);
index 3f6b507..bf9f192 100644 (file)
@@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
 static unsigned long pcxl_used_bytes __read_mostly;
 static unsigned long pcxl_used_pages __read_mostly;
 
-extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
+unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */
 static DEFINE_SPINLOCK(pcxl_res_lock);
 static char    *pcxl_res_map;
 static int     pcxl_res_hint;
@@ -381,7 +381,7 @@ pcxl_dma_init(void)
        pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
                                            get_order(pcxl_res_size));
        memset(pcxl_res_map, 0, pcxl_res_size);
-       proc_gsc_root = proc_mkdir("gsc", NULL);
+       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
        if (!proc_gsc_root)
                printk(KERN_WARNING
                        "pcxl_dma_init: Unable to create gsc /proc dir entry\n");
index 0d24735..0f9b3b5 100644 (file)
@@ -354,10 +354,8 @@ static int __init pdt_initcall(void)
                return -ENODEV;
 
        kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");
-       if (IS_ERR(kpdtd_task))
-               return PTR_ERR(kpdtd_task);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(kpdtd_task);
 }
 
 late_initcall(pdt_initcall);
index 90b04d8..b0f0816 100644 (file)
@@ -57,7 +57,7 @@ struct rdr_tbl_ent {
 static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
 static int perf_enabled __read_mostly;
 static DEFINE_SPINLOCK(perf_lock);
-struct parisc_device *cpu_device __read_mostly;
+static struct parisc_device *cpu_device __read_mostly;
 
 /* RDRs to write for PCX-W */
 static const int perf_rdrs_W[] =
index 00b0df9..762289b 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/pdc.h>
+#include <asm/smp.h>
 #include <asm/pdcpat.h>
 #include <asm/irq.h>           /* for struct irq_region */
 #include <asm/parisc-device.h>
index 573f830..211a4af 100644 (file)
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
-/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */
-struct proc_dir_entry * proc_runway_root __read_mostly = NULL;
-struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
-struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
-
 static void __init setup_cmdline(char **cmdline_p)
 {
        extern unsigned int boot_args[];
@@ -196,48 +191,6 @@ const struct seq_operations cpuinfo_op = {
        .show   = show_cpuinfo
 };
 
-static void __init parisc_proc_mkdir(void)
-{
-       /*
-       ** Can't call proc_mkdir() until after proc_root_init() has been
-       ** called by start_kernel(). In other words, this code can't
-       ** live in arch/.../setup.c because start_parisc() calls
-       ** start_kernel().
-       */
-       switch (boot_cpu_data.cpu_type) {
-       case pcxl:
-       case pcxl2:
-               if (NULL == proc_gsc_root)
-               {
-                       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
-               }
-               break;
-        case pcxt_:
-        case pcxu:
-        case pcxu_:
-        case pcxw:
-        case pcxw_:
-        case pcxw2:
-                if (NULL == proc_runway_root)
-                {
-                        proc_runway_root = proc_mkdir("bus/runway", NULL);
-                }
-                break;
-       case mako:
-       case mako2:
-                if (NULL == proc_mckinley_root)
-                {
-                        proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
-                }
-                break;
-       default:
-               /* FIXME: this was added to prevent the compiler 
-                * complaining about missing pcx, pcxs and pcxt
-                * I'm assuming they have neither gsc nor runway */
-               break;
-       }
-}
-
 static struct resource central_bus = {
        .name   = "Central Bus",
        .start  = F_EXTEND(0xfff80000),
@@ -294,7 +247,6 @@ static int __init parisc_init(void)
 {
        u32 osid = (OS_ID_LINUX << 16);
 
-       parisc_proc_mkdir();
        parisc_init_resources();
        do_device_inventory();                  /* probe for hardware */
 
index f886ff0..e8d27de 100644 (file)
@@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs)
        regs->gr[31] -= 8; /* delayed branching */
 
        /* Get assembler opcode of code in delay branch */
-       uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+       uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4);
        err = get_user(opcode, uaddr);
        if (err)
                return;
index ca2d537..9915062 100644 (file)
 #include <linux/elf-randomize.h>
 
 /*
- * Construct an artificial page offset for the mapping based on the virtual
+ * Construct an artificial page offset for the mapping based on the physical
  * address of the kernel file mapping variable.
- * If filp is zero the calculated pgoff value aliases the memory of the given
- * address. This is useful for io_uring where the mapping shall alias a kernel
- * address and a userspace adress where both the kernel and the userspace
- * access the same memory region.
  */
-#define GET_FILP_PGOFF(filp, addr)             \
-       ((filp ? (((unsigned long) filp->f_mapping) >> 8)       \
-                & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)        \
-         + (addr >> PAGE_SHIFT))
+#define GET_FILP_PGOFF(filp)           \
+       (filp ? (((unsigned long) filp->f_mapping) >> 8)        \
+                & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)
 
 static unsigned long shared_align_offset(unsigned long filp_pgoff,
                                         unsigned long pgoff)
@@ -117,7 +112,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
        do_color_align = 0;
        if (filp || (flags & MAP_SHARED))
                do_color_align = 1;
-       filp_pgoff = GET_FILP_PGOFF(filp, addr);
+       filp_pgoff = GET_FILP_PGOFF(filp);
 
        if (flags & MAP_FIXED) {
                /* Even MAP_FIXED mappings must reside within TASK_SIZE */
index 1373e51..1f51aa9 100644 (file)
@@ -39,6 +39,7 @@ registers).
 #include <asm/assembly.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
+#include <asm/spinlock_types.h>
 
 #include <linux/linkage.h>
 
@@ -66,6 +67,16 @@ registers).
        stw     \reg1, 0(%sr2,\reg2)
        .endm
 
+       /* raise exception if spinlock content is not zero or
+        * __ARCH_SPIN_LOCK_UNLOCKED_VAL */
+       .macro  spinlock_check spin_val,tmpreg
+#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg
+       andcm,= \spin_val, \tmpreg, %r0
+       .word   SPINLOCK_BREAK_INSN
+#endif
+       .endm
+
        .text
 
        .import syscall_exit,code
@@ -508,7 +519,8 @@ lws_start:
 
 lws_exit_noerror:
        lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
        ssm     PSW_SM_I, %r0
        b       lws_exit
        copy    %r0, %r21
@@ -521,7 +533,8 @@ lws_wouldblock:
 
 lws_pagefault:
        lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
        ssm     PSW_SM_I, %r0
        ldo     3(%r0),%r28
        b       lws_exit
@@ -619,6 +632,7 @@ lws_compare_and_swap:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -772,6 +786,7 @@ cas2_lock_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1001,6 +1016,7 @@ atomic_xchg_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1199,6 +1215,7 @@ atomic_store_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start)
        /* lws locks */
        .rept 256
        /* Keep locks aligned at 16-bytes */
-       .word 1
+       .word __ARCH_SPIN_LOCK_UNLOCKED_VAL
        .word 0 
        .word 0
        .word 0
index 8130627..170d0dd 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
+#include <linux/sysctl.h>
 #include <asm/unaligned.h>
 #include <asm/hardirq.h>
 #include <asm/traps.h>
index 8e6014a..9d8b4db 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
+#include <linux/libgcc.h>
 
 union ull_union {
        unsigned long long ull;
@@ -9,7 +10,7 @@ union ull_union {
        } ui;
 };
 
-int __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
 {
        union ull_union au = {.ull = a};
        union ull_union bu = {.ull = b};
index a4c7c76..2fe5b44 100644 (file)
@@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs)
  * For implementation see handle_interruption() in traps.c
  */
 static const char * const trap_description[] = {
-       [1] "High-priority machine check (HPMC)",
-       [2] "Power failure interrupt",
-       [3] "Recovery counter trap",
-       [5] "Low-priority machine check",
-       [6] "Instruction TLB miss fault",
-       [7] "Instruction access rights / protection trap",
-       [8] "Illegal instruction trap",
-       [9] "Break instruction trap",
-       [10] "Privileged operation trap",
-       [11] "Privileged register trap",
-       [12] "Overflow trap",
-       [13] "Conditional trap",
-       [14] "FP Assist Exception trap",
-       [15] "Data TLB miss fault",
-       [16] "Non-access ITLB miss fault",
-       [17] "Non-access DTLB miss fault",
-       [18] "Data memory protection/unaligned access trap",
-       [19] "Data memory break trap",
-       [20] "TLB dirty bit trap",
-       [21] "Page reference trap",
-       [22] "Assist emulation trap",
-       [25] "Taken branch trap",
-       [26] "Data memory access rights trap",
-       [27] "Data memory protection ID trap",
-       [28] "Unaligned data reference trap",
+       [1] =   "High-priority machine check (HPMC)",
+       [2] =   "Power failure interrupt",
+       [3] =   "Recovery counter trap",
+       [5] =   "Low-priority machine check",
+       [6] =   "Instruction TLB miss fault",
+       [7] =   "Instruction access rights / protection trap",
+       [8] =   "Illegal instruction trap",
+       [9] =   "Break instruction trap",
+       [10] =  "Privileged operation trap",
+       [11] =  "Privileged register trap",
+       [12] =  "Overflow trap",
+       [13] =  "Conditional trap",
+       [14] =  "FP Assist Exception trap",
+       [15] =  "Data TLB miss fault",
+       [16] =  "Non-access ITLB miss fault",
+       [17] =  "Non-access DTLB miss fault",
+       [18] =  "Data memory protection/unaligned access trap",
+       [19] =  "Data memory break trap",
+       [20] =  "TLB dirty bit trap",
+       [21] =  "Page reference trap",
+       [22] =  "Assist emulation trap",
+       [25] =  "Taken branch trap",
+       [26] =  "Data memory access rights trap",
+       [27] =  "Data memory protection ID trap",
+       [28] =  "Unaligned data reference trap",
 };
 
 const char *trap_name(unsigned long code)
index 389941c..a088c24 100644 (file)
@@ -523,10 +523,6 @@ void mark_rodata_ro(void)
 void *parisc_vmalloc_start __ro_after_init;
 EXPORT_SYMBOL(parisc_vmalloc_start);
 
-#ifdef CONFIG_PA11
-unsigned long pcxl_dma_start __ro_after_init;
-#endif
-
 void __init mem_init(void)
 {
        /* Do sanity checks on IPC (compat) structures */
index 345ff0b..d7ee1f4 100644 (file)
@@ -27,7 +27,7 @@
  */
 void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
 {
-       void __iomem *addr;
+       uintptr_t addr;
        struct vm_struct *area;
        unsigned long offset, last_addr;
        pgprot_t pgprot;
@@ -79,10 +79,9 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
        if (!area)
                return NULL;
 
-       addr = (void __iomem *) area->addr;
-       if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-                              phys_addr, pgprot)) {
-               vunmap(addr);
+       addr = (uintptr_t) area->addr;
+       if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+               vunmap(area->addr);
                return NULL;
        }
 
index 8a6754f..a6c7069 100644 (file)
@@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -411,8 +410,6 @@ static inline void prefetchw(const void *x)
        __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
index 4caf5e3..359577e 100644 (file)
@@ -709,9 +709,9 @@ static int __init rtas_flash_init(void)
        if (!rtas_validate_flash_data.buf)
                return -ENOMEM;
 
-       flash_block_cache = kmem_cache_create("rtas_flash_cache",
-                                             RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
-                                             NULL);
+       flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+                                                      RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+                                                      0, 0, RTAS_BLK_SIZE, NULL);
        if (!flash_block_cache) {
                printk(KERN_ERR "%s: failed to create block cache\n",
                                __func__);
index 0dc8555..ec98e52 100644 (file)
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops subpage_walk_ops = {
        .pmd_entry      = subpage_walk_pmd_entry,
+       .walk_lock      = PGWALK_WRLOCK_VERIFY,
 };
 
 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
index 4c07b91..bea7b73 100644 (file)
@@ -570,24 +570,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE
 config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
        def_bool y
        # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
-       depends on AS_IS_GNU && AS_VERSION >= 23800
-       help
-         Newer binutils versions default to ISA spec version 20191213 which
-         moves some instructions from the I extension to the Zicsr and Zifencei
-         extensions.
+       # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd
+       depends on AS_IS_GNU && AS_VERSION >= 23600
+       help
+         Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer
+         20191213 version, which moves some instructions from the I extension to
+         the Zicsr and Zifencei extensions. This requires explicitly specifying
+         Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr
+         and Zifencei are supported in binutils from version 2.36 onwards.
+         To make life easier, and avoid forcing toolchains that default to a
+         newer ISA spec to version 2.2, relax the check to binutils >= 2.36.
+         For clang < 17 or GCC < 11.3.0, for which this is not possible or need
+         special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC.
 
 config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
        def_bool y
        depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
        # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
-       depends on CC_IS_CLANG && CLANG_VERSION < 170000
-       help
-         Certain versions of clang do not support zicsr and zifencei via -march
-         but newer versions of binutils require it for the reasons noted in the
-         help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This
-         option causes an older ISA spec compatible with these older versions
-         of clang to be passed to GAS, which has the same result as passing zicsr
-         and zifencei to -march.
+       # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671
+       depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300)
+       help
+         Certain versions of clang and GCC do not support zicsr and zifencei via
+         -march. This option causes an older ISA spec compatible with these older
+         versions of clang and GCC to be passed to GAS, which has the same result
+         as passing zicsr and zifencei to -march.
 
 config FPU
        bool "FPU support"
index 8091b8b..b93ffdd 100644 (file)
@@ -37,6 +37,10 @@ static inline void flush_dcache_page(struct page *page)
 #define flush_icache_user_page(vma, pg, addr, len) \
        flush_icache_mm(vma->vm_mm, 0)
 
+#ifdef CONFIG_64BIT
+#define flush_cache_vmap(start, end)   flush_tlb_kernel_range(start, end)
+#endif
+
 #ifndef CONFIG_SMP
 
 #define flush_icache_all() local_flush_icache_all()
index 4e1505c..fce0040 100644 (file)
 #define RVC_INSN_FUNCT4_OPOFF  12
 #define RVC_INSN_FUNCT3_MASK   GENMASK(15, 13)
 #define RVC_INSN_FUNCT3_OPOFF  13
+#define RVC_INSN_J_RS1_MASK    GENMASK(11, 7)
 #define RVC_INSN_J_RS2_MASK    GENMASK(6, 2)
 #define RVC_INSN_OPCODE_MASK   GENMASK(1, 0)
 #define RVC_ENCODE_FUNCT3(f_)  (RVC_FUNCT3_##f_ << RVC_INSN_FUNCT3_OPOFF)
@@ -245,8 +246,6 @@ __RISCV_INSN_FUNCS(c_jal, RVC_MASK_C_JAL, RVC_MATCH_C_JAL)
 __RISCV_INSN_FUNCS(auipc, RVG_MASK_AUIPC, RVG_MATCH_AUIPC)
 __RISCV_INSN_FUNCS(jalr, RVG_MASK_JALR, RVG_MATCH_JALR)
 __RISCV_INSN_FUNCS(jal, RVG_MASK_JAL, RVG_MATCH_JAL)
-__RISCV_INSN_FUNCS(c_jr, RVC_MASK_C_JR, RVC_MATCH_C_JR)
-__RISCV_INSN_FUNCS(c_jalr, RVC_MASK_C_JALR, RVC_MATCH_C_JALR)
 __RISCV_INSN_FUNCS(c_j, RVC_MASK_C_J, RVC_MATCH_C_J)
 __RISCV_INSN_FUNCS(beq, RVG_MASK_BEQ, RVG_MATCH_BEQ)
 __RISCV_INSN_FUNCS(bne, RVG_MASK_BNE, RVG_MATCH_BNE)
@@ -273,6 +272,18 @@ static __always_inline bool riscv_insn_is_branch(u32 code)
        return (code & RV_INSN_OPCODE_MASK) == RVG_OPCODE_BRANCH;
 }
 
+static __always_inline bool riscv_insn_is_c_jr(u32 code)
+{
+       return (code & RVC_MASK_C_JR) == RVC_MATCH_C_JR &&
+              (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
+static __always_inline bool riscv_insn_is_c_jalr(u32 code)
+{
+       return (code & RVC_MASK_C_JALR) == RVC_MATCH_C_JALR &&
+              (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
 #define RV_IMM_SIGN(x) (-(((x) >> 31) & 1))
 #define RVC_IMM_SIGN(x) (-(((x) >> 12) & 1))
 #define RV_X(X, s, mask)  (((X) >> (s)) & (mask))
index aff6c33..4c58ee7 100644 (file)
@@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  * Relaxed I/O memory access primitives. These follow the Device memory
  * ordering rules but do not guarantee any ordering relative to Normal memory
  * accesses.  These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
+ * write) with all other I/O memory accesses to the same peripheral. Since the
+ * platform specification defines that all I/O regions are strongly ordered on
+ * channel 0, no explicit fences are required to enforce this ordering.
  */
 /* FIXME: These are now the same as asm-generic */
 #define __io_rbr()             do {} while (0)
@@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 #endif
 
 /*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.  The memory barriers here are necessary as RISC-V
+ * I/O memory access primitives.  Reads are ordered relative to any following
+ * Normal memory read and delay() loop.  Writes are ordered relative to any
+ * prior Normal memory write.  The memory barriers here are necessary as RISC-V
  * doesn't define any ordering between the memory space and the I/O space.
  */
 #define __io_br()      do {} while (0)
-#define __io_ar(v)     __asm__ __volatile__ ("fence i,r" : : : "memory")
-#define __io_bw()      __asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_ar(v)     ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
+#define __io_bw()      ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
 #define __io_aw()      mmiowb_set_pending()
 
 #define readb(c)       ({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
index 75970ee..b5680c9 100644 (file)
@@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata;
 #define PAGE_KERNEL_IO         __pgprot(_PAGE_IOREMAP)
 
 extern pgd_t swapper_pg_dir[];
+extern pgd_t trampoline_pg_dir[];
+extern pgd_t early_pg_dir[];
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_present(pmd_t pmd)
index 3d78930..c5ee07b 100644 (file)
@@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
                "csrr   %1, " __stringify(CSR_VTYPE) "\n\t"
                "csrr   %2, " __stringify(CSR_VL) "\n\t"
                "csrr   %3, " __stringify(CSR_VCSR) "\n\t"
+               "csrr   %4, " __stringify(CSR_VLENB) "\n\t"
                : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
-                 "=r" (dest->vcsr) : :);
+                 "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
 }
 
 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
index 58d3e44..924d01b 100644 (file)
@@ -3,12 +3,14 @@
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 
+extern bool pgtable_l4_enabled, pgtable_l5_enabled;
+
 #define IOREMAP_MAX_ORDER (PUD_SHIFT)
 
 #define arch_vmap_pud_supported arch_vmap_pud_supported
 static inline bool arch_vmap_pud_supported(pgprot_t prot)
 {
-       return true;
+       return pgtable_l4_enabled || pgtable_l5_enabled;
 }
 
 #define arch_vmap_pmd_supported arch_vmap_pmd_supported
diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..7d0b32e
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
index e17c550..2838001 100644 (file)
@@ -97,6 +97,7 @@ struct __riscv_v_ext_state {
        unsigned long vl;
        unsigned long vtype;
        unsigned long vcsr;
+       unsigned long vlenb;
        void *datap;
        /*
         * In signal handler, datap will be set a correct user stack offset
index 1893457..b86e5e2 100644 (file)
@@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache
 COMPAT_CC := $(CC)
 COMPAT_LD := $(LD)
 
-COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+# binutils 2.35 does not support the zifencei extension, but in the ISA
+# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI.
+ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
+       COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+else
+       COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32
+endif
 COMPAT_LD_FLAGS := -melf32lriscv
 
 # Disable attributes, as they're useless and break the build.
index a2fc952..35b854c 100644 (file)
 #include <asm/smp.h>
 #include <asm/pgtable.h>
 
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+       return phys_id == cpuid_to_hartid_map(cpu);
+}
+
 /*
  * Returns the hart ID of the given device tree node, or -ENODEV if the node
  * isn't an enabled and valid RISC-V hart node.
index 5372b70..c08bb5c 100644 (file)
@@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                kbuf.buffer = initrd;
                kbuf.bufsz = kbuf.memsz = initrd_len;
                kbuf.buf_align = PAGE_SIZE;
-               kbuf.top_down = false;
+               kbuf.top_down = true;
                kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
                ret = kexec_add_buffer(&kbuf);
                if (ret)
@@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
                 * sym, instead of searching the whole relsec.
                 */
                case R_RISCV_PCREL_HI20:
+               case R_RISCV_CALL_PLT:
                case R_RISCV_CALL:
                        *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
                                 ENCODE_UJTYPE_IMM(val - addr);
index d0577cc..a8efa05 100644 (file)
@@ -84,6 +84,9 @@ void do_softirq_own_stack(void)
                : [sp] "r" (sp)
                : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
                  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+                 "s0",
+#endif
                  "memory");
        } else
 #endif
index 1d572cf..487303e 100644 (file)
@@ -25,9 +25,6 @@ enum riscv_regset {
 #ifdef CONFIG_FPU
        REGSET_F,
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-       REGSET_V,
-#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target,
 }
 #endif
 
-#ifdef CONFIG_RISCV_ISA_V
-static int riscv_vr_get(struct task_struct *target,
-                       const struct user_regset *regset,
-                       struct membuf to)
-{
-       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-       if (!riscv_v_vstate_query(task_pt_regs(target)))
-               return -EINVAL;
-
-       /*
-        * Ensure the vector registers have been saved to the memory before
-        * copying them to membuf.
-        */
-       if (target == current)
-               riscv_v_vstate_save(current, task_pt_regs(current));
-
-       /* Copy vector header from vstate. */
-       membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap));
-       membuf_zero(&to, sizeof(vstate->datap));
-
-       /* Copy all the vector registers from vstate. */
-       return membuf_write(&to, vstate->datap, riscv_v_vsize);
-}
-
-static int riscv_vr_set(struct task_struct *target,
-                       const struct user_regset *regset,
-                       unsigned int pos, unsigned int count,
-                       const void *kbuf, const void __user *ubuf)
-{
-       int ret, size;
-       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-       if (!riscv_v_vstate_query(task_pt_regs(target)))
-               return -EINVAL;
-
-       /* Copy rest of the vstate except datap */
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0,
-                                offsetof(struct __riscv_v_ext_state, datap));
-       if (unlikely(ret))
-               return ret;
-
-       /* Skip copy datap. */
-       size = sizeof(vstate->datap);
-       count -= size;
-       ubuf += size;
-
-       /* Copy all the vector registers. */
-       pos = 0;
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
-                                0, riscv_v_vsize);
-       return ret;
-}
-#endif
-
 static const struct user_regset riscv_user_regset[] = {
        [REGSET_X] = {
                .core_note_type = NT_PRSTATUS,
@@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = {
                .set = riscv_fpr_set,
        },
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-       [REGSET_V] = {
-               .core_note_type = NT_RISCV_VECTOR,
-               .align = 16,
-               .n = ((32 * RISCV_MAX_VLENB) +
-                     sizeof(struct __riscv_v_ext_state)) / sizeof(__u32),
-               .size = sizeof(__u32),
-               .regset_get = riscv_vr_get,
-               .set = riscv_vr_set,
-       },
-#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
index 85bbce0..40420af 100644 (file)
@@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
        return -ENOENT;
 }
 
-bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
-{
-       return phys_id == cpuid_to_hartid_map(cpu);
-}
-
 static void ipi_stop(void)
 {
        set_cpu_online(smp_processor_id(), false);
index f910dfc..f798c85 100644 (file)
@@ -297,7 +297,7 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
 asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
 {
        if (user_mode(regs)) {
-               ulong syscall = regs->a7;
+               long syscall = regs->a7;
 
                regs->epc += 4;
                regs->orig_a0 = regs->a0;
@@ -306,9 +306,9 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
 
                syscall = syscall_enter_from_user_mode(regs, syscall);
 
-               if (syscall < NR_syscalls)
+               if (syscall >= 0 && syscall < NR_syscalls)
                        syscall_handler(regs, syscall);
-               else
+               else if (syscall != -1)
                        regs->a0 = -ENOSYS;
 
                syscall_exit_to_user_mode(regs);
@@ -372,6 +372,9 @@ asmlinkage void noinstr do_irq(struct pt_regs *regs)
                : [sp] "r" (sp), [regs] "r" (regs)
                : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
                  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+                 "s0",
+#endif
                  "memory");
        } else
 #endif
index f2eb479..068c745 100644 (file)
@@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
 {
 }
@@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        int ret;
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.pgd)
                return false;
index ec486e5..09b47eb 100644 (file)
@@ -17,8 +17,11 @@ ENTRY(__asm_copy_from_user)
        li t6, SR_SUM
        csrs CSR_STATUS, t6
 
-       /* Save for return value */
-       mv      t5, a2
+       /*
+        * Save the terminal address which will be used to compute the number
+        * of bytes copied in case of a fixup exception.
+        */
+       add     t5, a0, a2
 
        /*
         * Register allocation for code below:
@@ -176,7 +179,7 @@ ENTRY(__asm_copy_from_user)
 10:
        /* Disable access to user memory */
        csrc CSR_STATUS, t6
-       mv a0, t5
+       sub a0, t5, a0
        ret
 ENDPROC(__asm_copy_to_user)
 ENDPROC(__asm_copy_from_user)
@@ -228,7 +231,7 @@ ENTRY(__clear_user)
 11:
        /* Disable access to user memory */
        csrc CSR_STATUS, t6
-       mv a0, a1
+       sub a0, a3, a0
        ret
 ENDPROC(__clear_user)
 EXPORT_SYMBOL(__clear_user)
index 9ce5047..e4c35ac 100644 (file)
 #include <linux/kfence.h>
 
 #include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/soc.h>
 #include <asm/io.h>
-#include <asm/ptdump.h>
 #include <asm/numa.h>
+#include <asm/pgtable.h>
+#include <asm/ptdump.h>
+#include <asm/sections.h>
+#include <asm/soc.h>
+#include <asm/tlbflush.h>
 
 #include "../kernel/head.h"
 
@@ -214,8 +215,13 @@ static void __init setup_bootmem(void)
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
        phys_ram_end = memblock_end_of_DRAM();
+
+       /*
+        * Make sure we align the start of the memory on a PMD boundary so that
+        * at worst, we map the linear mapping with PMD mappings.
+        */
        if (!IS_ENABLED(CONFIG_XIP_KERNEL))
-               phys_ram_base = memblock_start_of_DRAM();
+               phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
 
        /*
         * In 64-bit, any use of __va/__pa before this point is wrong as we
index 8fc0efc..a01bc15 100644 (file)
@@ -22,7 +22,6 @@
  * region is not and then we have to go down to the PUD level.
  */
 
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
 pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
 pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
index ea3d61d..161d0b3 100644 (file)
@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
        .pmd_entry = pageattr_pmd_entry,
        .pte_entry = pageattr_pte_entry,
        .pte_hole = pageattr_pte_hole,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
index 2bbc3d5..427f952 100644 (file)
@@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
        __u64 *fac_list;
        u64 cpuid;
        unsigned short ibc;
+       /* subset of available UV-features for pv-guests enabled by user space */
+       struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
 };
 
 typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
@@ -1028,6 +1030,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
 
 extern char sie_exit;
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
 extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
index d6bb2f4..0e7bd38 100644 (file)
@@ -99,6 +99,8 @@ enum uv_cmds_inst {
 enum uv_feat_ind {
        BIT_UV_FEAT_MISC = 0,
        BIT_UV_FEAT_AIV = 1,
+       BIT_UV_FEAT_AP = 4,
+       BIT_UV_FEAT_AP_INTR = 5,
 };
 
 struct uv_cb_header {
@@ -159,7 +161,15 @@ struct uv_cb_cgc {
        u64 guest_handle;
        u64 conf_base_stor_origin;
        u64 conf_virt_stor_origin;
-       u64 reserved30;
+       u8  reserved30[6];
+       union {
+               struct {
+                       u16 : 14;
+                       u16 ap_instr_intr : 1;
+                       u16 ap_allow_instr : 1;
+               };
+               u16 raw;
+       } flags;
        u64 guest_stor_origin;
        u64 guest_stor_len;
        u64 guest_sca;
@@ -397,6 +407,13 @@ struct uv_info {
 
 extern struct uv_info uv_info;
 
+static inline bool uv_has_feature(u8 feature_bit)
+{
+       if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+               return false;
+       return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 extern int prot_virt_guest;
 
@@ -463,6 +480,7 @@ static inline int is_prot_virt_host(void)
        return prot_virt_host;
 }
 
+int uv_pin_shared(unsigned long paddr);
 int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
 int uv_destroy_owned_page(unsigned long paddr);
@@ -475,6 +493,11 @@ void setup_uv(void);
 #define is_prot_virt_host() 0
 static inline void setup_uv(void) {}
 
+static inline int uv_pin_shared(unsigned long paddr)
+{
+       return 0;
+}
+
 static inline int uv_destroy_owned_page(unsigned long paddr)
 {
        return 0;
index a73cf01..abe926d 100644 (file)
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
        __u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST        6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST  7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS        64
+struct kvm_s390_vm_cpu_uv_feat {
+       union {
+               struct {
+                       __u64 : 4;
+                       __u64 ap : 1;           /* bit 4 */
+                       __u64 ap_intr : 1;      /* bit 5 */
+                       __u64 : 58;
+               };
+               __u64 feat;
+       };
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 66f0eb1..fc07bc3 100644 (file)
@@ -88,7 +88,7 @@ fail:
  * Requests the Ultravisor to pin the page in the shared state. This will
  * cause an intercept when the guest attempts to unshare the pinned page.
  */
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
 {
        struct uv_cb_cfs uvcb = {
                .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
                return -EINVAL;
        return 0;
 }
+EXPORT_SYMBOL_GPL(uv_pin_shared);
 
 /*
  * Requests the Ultravisor to destroy a guest page and make it
@@ -257,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
         * shared page from a different protected VM will automatically also
         * transfer its ownership.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+       if (uv_has_feature(BIT_UV_FEAT_MISC))
                return false;
        if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
                return false;
index 341abaf..b163520 100644 (file)
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+       if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) &&
+           vcpu->arch.sie_block->iprcc != PGM_PER) {
+               /*
+                * __vcpu_run() will exit after delivering the concurrently
+                * indicated condition.
+                */
+               return false;
+       }
+       return true;
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
        psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
        if (kvm_s390_pv_cpu_is_protected(vcpu))
                return -EOPNOTSUPP;
 
-       if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+       if (should_handle_per_event(vcpu)) {
                rc = kvm_s390_handle_per_event(vcpu);
                if (rc)
                        return rc;
@@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
        return handle_instruction(vcpu);
 }
 
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+       /* Process PER, also if the instruction is processed in user space. */
+       if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+               return false;
+       if (rc != 0 && rc != -EOPNOTSUPP)
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+               /* __vcpu_run() will exit after delivering the interrupt. */
+               return false;
+       return true;
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
        int rc, per_rc = 0;
@@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                rc = handle_partial_execution(vcpu);
                break;
        case ICPT_KSS:
-               rc = kvm_s390_skey_check_enable(vcpu);
-               break;
+               /* Instruction will be redriven, skip the PER check. */
+               return kvm_s390_skey_check_enable(vcpu);
        case ICPT_MCHKREQ:
        case ICPT_INT_ENABLE:
                /*
@@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP;
        }
 
-       /* process PER, also if the instruction is processed in user space */
-       if (vcpu->arch.sie_block->icptstatus & 0x02 &&
-           (!rc || rc == -EOPNOTSUPP))
+       if (should_handle_per_ifetch(vcpu, rc))
                per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
        return per_rc ? per_rc : rc;
 }
index 9bd0a87..85e39f4 100644 (file)
@@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc = 0;
+       bool delivered = false;
        unsigned long irq_type;
        unsigned long irqs;
 
@@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                        WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
                        clear_bit(irq_type, &li->pending_irqs);
                }
+               delivered |= !rc;
+       }
+
+       /*
+        * We delivered at least one interrupt and modified the PC. Force a
+        * singlestep event now.
+        */
+       if (delivered && guestdbg_sstep_enabled(vcpu)) {
+               struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+               debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+               debug_exit->type = KVM_SINGLESTEP;
+               vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
        }
 
        set_intercept_indicators(vcpu);
index d1e768b..b3f17e0 100644 (file)
@@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
        return 0;
 }
 
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK     \
+(                                              \
+       ((struct kvm_s390_vm_cpu_uv_feat){      \
+               .ap = 1,                        \
+               .ap_intr = 1,                   \
+       })                                      \
+       .feat                                   \
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+       unsigned long data, filter;
+
+       filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (get_user(data, &ptr->feat))
+               return -EFAULT;
+       if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       if (kvm->created_vcpus) {
+               mutex_unlock(&kvm->lock);
+               return -EBUSY;
+       }
+       kvm->arch.model.uv_feat_guest.feat = data;
+       mutex_unlock(&kvm->lock);
+
+       VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+       return 0;
+}
+
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
                ret = kvm_s390_set_processor_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_set_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
        return 0;
 }
 
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat;
+
+       BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+       feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                ret = kvm_s390_get_machine_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+               break;
+       case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
                case KVM_S390_VM_CPU_MACHINE_FEAT:
                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+               case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
                        ret = 0;
                        break;
                default:
@@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
        struct kvm_vcpu *vcpu;
 
        /* Disable the GISA if the ultravisor does not support AIV. */
-       if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+       if (!uv_has_feature(BIT_UV_FEAT_AIV))
                kvm_s390_gisa_disable(kvm);
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
+       kvm->arch.model.uv_feat_guest.feat = 0;
+
        kvm_s390_crypto_init(kvm);
 
        if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
        if (!kvm_is_ucontrol(vcpu->kvm)) {
                rc = kvm_s390_deliver_pending_interrupts(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        return rc;
        }
 
@@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
        do {
                rc = vcpu_pre_run(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        break;
 
                kvm_vcpu_srcu_read_unlock(vcpu);
@@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int rc;
 
        switch (ioctl) {
        case KVM_S390_IRQ: {
@@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 
                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
                        return -EFAULT;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
@@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
                        return -EFAULT;
                if (s390int_to_s390irq(&s390int, &s390irq))
                        return -EINVAL;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
+       default:
+               rc = -ENOIOCTLCMD;
+               break;
        }
-       return -ENOIOCTLCMD;
+
+       /*
+        * To simplify single stepping of userspace-emulated instructions,
+        * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+        * should_handle_per_ifetch()). However, if userspace emulation injects
+        * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+        * after (and not before) the interrupt delivery.
+        */
+       if (!rc)
+               vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+       return rc;
 }
 
 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
index 0261d42..a7ea80c 100644 (file)
@@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
        return vcpu->arch.pv.handle;
 }
 
-static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
-{
-       lockdep_assert_held(&kvm->lock);
-       return !!kvm_s390_pv_get_handle(kvm);
-}
-
-static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
-{
-       lockdep_assert_held(&vcpu->mutex);
-       return !!kvm_s390_pv_cpu_get_handle(vcpu);
-}
-
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
index bf1fdc7..75e81ba 100644 (file)
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+       lockdep_assert_held(&kvm->lock);
+       return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+       lockdep_assert_held(&vcpu->mutex);
+       return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
 /**
  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
  * be destroyed
@@ -271,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
        WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
        KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
                     uvcb.header.rc, uvcb.header.rrc);
-       WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+       WARN_ONCE(cc && uvcb.header.rc != 0x104,
+                 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
                  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
        /* Intended memory leak on "impossible" error */
        if (!cc)
@@ -561,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
        uvcb.conf_base_stor_origin =
                virt_to_phys((void *)kvm->arch.pv.stor_base);
        uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+       uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+       uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
 
        cc = uv_call_sched(0, (u64)&uvcb);
        *rc = uvcb.header.rc;
        *rrc = uvcb.header.rrc;
-       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
-                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
 
        /* Outputs */
        kvm->arch.pv.handle = uvcb.guest_handle;
index 2f12342..7474c20 100644 (file)
@@ -827,7 +827,7 @@ void do_secure_storage_access(struct pt_regs *regs)
         * reliable without the misc UV feature so we need to check
         * for that as well.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+       if (uv_has_feature(BIT_UV_FEAT_MISC) &&
            !test_bit_inv(61, &regs->int_parm_long)) {
                /*
                 * When this happens, userspace did something that it
index 9c8af31..906a7bf 100644 (file)
@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops thp_split_walk_ops = {
        .pmd_entry      = thp_split_walk_pmd_entry,
+       .walk_lock      = PGWALK_WRLOCK_VERIFY,
 };
 
 static inline void thp_split_mm(struct mm_struct *mm)
@@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops zap_zero_walk_ops = {
        .pmd_entry      = __zap_zero_pages,
+       .walk_lock      = PGWALK_WRLOCK,
 };
 
 /*
@@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
        .hugetlb_entry          = __s390_enable_skey_hugetlb,
        .pte_entry              = __s390_enable_skey_pte,
        .pmd_entry              = __s390_enable_skey_pmd,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 int s390_enable_skey(void)
@@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
 
 static const struct mm_walk_ops reset_cmma_walk_ops = {
        .pte_entry              = __s390_reset_cmma,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 void s390_reset_cmma(struct mm_struct *mm)
@@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
 
 static const struct mm_walk_ops gather_pages_ops = {
        .pte_entry = s390_gather_pages,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
index 2667f35..0a0d5c3 100644 (file)
@@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task);
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -239,8 +238,6 @@ static inline void prefetchw(const void *x)
                             : "r" (x));
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
 int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap);
index 7422db4..e36261b 100644 (file)
@@ -2593,6 +2593,13 @@ config CPU_IBRS_ENTRY
          This mitigates both spectre_v2 and retbleed at great cost to
          performance.
 
+config CPU_SRSO
+       bool "Mitigate speculative RAS overflow on AMD"
+       depends on CPU_SUP_AMD && X86_64 && RETHUNK
+       default y
+       help
+         Enable the SRSO mitigation needed on AMD Zen1-4 machines.
+
 config SLS
        bool "Mitigate Straight-Line-Speculation"
        depends on CC_HAS_SLS && X86_64
@@ -2603,6 +2610,25 @@ config SLS
          against straight line speculation. The kernel image might be slightly
          larger.
 
+config GDS_FORCE_MITIGATION
+       bool "Force GDS Mitigation"
+       depends on CPU_SUP_INTEL
+       default n
+       help
+         Gather Data Sampling (GDS) is a hardware vulnerability which allows
+         unprivileged speculative access to data which was previously stored in
+         vector registers.
+
+         This option is equivalent to setting gather_data_sampling=force on the
+         command line. The microcode mitigation is used if present, otherwise
+         AVX is disabled as a mitigation. On affected systems that are missing
+         the microcode any userspace code that unconditionally uses AVX will
+         break with this option set.
+
+         Setting this option on systems not vulnerable to GDS has no effect.
+
+         If in doubt, say N.
+
 endif
 
 config ARCH_HAS_ADD_PAGES
index 6debb81..3cdf94b 100644 (file)
@@ -63,7 +63,14 @@ void load_stage2_idt(void)
        set_idt_entry(X86_TRAP_PF, boot_page_fault);
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
-       set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+       /*
+        * Clear the second stage #VC handler in case guest types
+        * needing #VC have not been detected.
+        */
+       if (sev_status & BIT(1))
+               set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+       else
+               set_idt_entry(X86_TRAP_VC, NULL);
 #endif
 
        load_boot_idt(&boot_idt_desc);
index 09dc8c1..c3e343b 100644 (file)
@@ -405,12 +405,45 @@ void sev_enable(struct boot_params *bp)
                bp->cc_blob_address = 0;
 
        /*
+        * Do an initial SEV capability check before snp_init() which
+        * loads the CPUID page and the same checks afterwards are done
+        * without the hypervisor and are trustworthy.
+        *
+        * If the HV fakes SEV support, the guest will crash'n'burn
+        * which is good enough.
+        */
+
+       /* Check for the SME/SEV support leaf */
+       eax = 0x80000000;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       if (eax < 0x8000001f)
+               return;
+
+       /*
+        * Check for the SME/SEV feature:
+        *   CPUID Fn8000_001F[EAX]
+        *   - Bit 0 - Secure Memory Encryption support
+        *   - Bit 1 - Secure Encrypted Virtualization support
+        *   CPUID Fn8000_001F[EBX]
+        *   - Bits 5:0 - Pagetable bit position used to indicate encryption
+        */
+       eax = 0x8000001f;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       /* Check whether SEV is supported */
+       if (!(eax & BIT(1)))
+               return;
+
+       /*
         * Setup/preliminary detection of SNP. This will be sanity-checked
         * against CPUID/MSR values later.
         */
        snp = snp_init(bp);
 
-       /* Check for the SME/SEV support leaf */
+       /* Now repeat the checks with the SNP CPUID table. */
+
+       /* Recheck the SME/SEV support leaf */
        eax = 0x80000000;
        ecx = 0;
        native_cpuid(&eax, &ebx, &ecx, &edx);
@@ -418,7 +451,7 @@ void sev_enable(struct boot_params *bp)
                return;
 
        /*
-        * Check for the SME/SEV feature:
+        * Recheck for the SME/SEV feature:
         *   CPUID Fn8000_001F[EAX]
         *   - Bit 0 - Secure Memory Encryption support
         *   - Bit 1 - Secure Encrypted Virtualization support
index 11a5c68..7645730 100644 (file)
@@ -299,8 +299,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
 
        /* Round the lowest possible end address up to a PMD boundary. */
        end = (start + len + PMD_SIZE - 1) & PMD_MASK;
-       if (end >= TASK_SIZE_MAX)
-               end = TASK_SIZE_MAX;
+       if (end >= DEFAULT_MAP_WINDOW)
+               end = DEFAULT_MAP_WINDOW;
        end -= len;
 
        if (end > start) {
index 8eb74cf..2888c0e 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/mpspec.h>
 #include <asm/x86_init.h>
 #include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
 
 #ifdef CONFIG_ACPI_APEI
 # include <asm/pgtable_types.h>
@@ -31,6 +32,7 @@ extern int acpi_skip_timer_override;
 extern int acpi_use_timer_override;
 extern int acpi_fix_pin2_polarity;
 extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
 
 extern u8 acpi_sci_flags;
 extern u32 acpi_sci_override_gsi;
index cb8ca46..b69b0d7 100644 (file)
@@ -14,7 +14,7 @@
  * Defines x86 CPU feature bits
  */
 #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
 #define X86_FEATURE_SMBA               (11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC               (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
 
+#define X86_FEATURE_SRSO               (11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS         (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT     (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16                (12*32+ 5) /* AVX512 BFLOAT16 instructions */
 #define X86_FEATURE_AUTOIBRS           (20*32+ 8) /* "" Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR     (20*32+ 9) /* "" SMM_CTL MSR is not present */
 
+#define X86_FEATURE_SBPB               (20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE                (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO            (20*32+29) /* "" CPU is not affected by SRSO */
+
 /*
  * BUG word(s)
  */
 #define X86_BUG_RETBLEED               X86_BUG(27) /* CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB            X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB                        X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS                    X86_BUG(30) /* CPU is affected by Gather Data Sampling */
 
+/* BUG word 2 */
+#define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #endif /* _ASM_X86_CPUFEATURES_H */
index 1179038..ce8f501 100644 (file)
@@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 static __always_inline void arch_exit_to_user_mode(void)
 {
        mds_user_clear_cpu_buffers();
+       amd_clear_divider();
 }
 #define arch_exit_to_user_mode arch_exit_to_user_mode
 
index 3bc146d..6523f54 100644 (file)
@@ -1795,8 +1795,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 #define __KVM_HAVE_ARCH_VM_FREE
 void kvm_arch_free_vm(struct kvm *kvm);
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        if (kvm_x86_ops.flush_remote_tlbs &&
            !static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1805,6 +1805,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
                return -ENOTSUPP;
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 #define kvm_arch_pmi_in_guest(vcpu) \
        ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 
index 0953aa3..97a3de7 100644 (file)
@@ -21,7 +21,7 @@
 #define FUNCTION_PADDING
 #endif
 
-#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 # define __FUNC_ALIGN          __ALIGN; FUNCTION_PADDING
 #else
 # define __FUNC_ALIGN          __ALIGN
index a00a53e..1d11135 100644 (file)
@@ -57,6 +57,7 @@
 
 #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB                  BIT(7)     /* Selective Branch Prediction Barrier */
 
 #define MSR_PPIN_CTL                   0x0000004e
 #define MSR_PPIN                       0x0000004f
                                                 * Not susceptible to Post-Barrier
                                                 * Return Stack Buffer Predictions.
                                                 */
+#define ARCH_CAP_GDS_CTRL              BIT(25) /*
+                                                * CPU is vulnerable to Gather
+                                                * Data Sampling (GDS) and
+                                                * has controls for mitigation.
+                                                */
+#define ARCH_CAP_GDS_NO                        BIT(26) /*
+                                                * CPU is not vulnerable to Gather
+                                                * Data Sampling (GDS).
+                                                */
 
 #define ARCH_CAP_XAPIC_DISABLE         BIT(21) /*
                                                 * IA32_XAPIC_DISABLE_STATUS MSR
 #define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
 #define RTM_ALLOW                      BIT(1)  /* TSX development mode */
 #define FB_CLEAR_DIS                   BIT(3)  /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS                   BIT(4)  /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED                        BIT(5)  /* GDS mitigation locked */
 
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
index 1a65cf4..c55cc24 100644 (file)
  * eventually turn into it's own annotation.
  */
 .macro VALIDATE_UNRET_END
-#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY)
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+       (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
        ANNOTATE_RETPOLINE_SAFE
        nop
 #endif
 .endm
 
 #ifdef CONFIG_CPU_UNRET_ENTRY
-#define CALL_ZEN_UNTRAIN_RET   "call zen_untrain_ret"
+#define CALL_UNTRAIN_RET       "call entry_untrain_ret"
 #else
-#define CALL_ZEN_UNTRAIN_RET   ""
+#define CALL_UNTRAIN_RET       ""
 #endif
 
 /*
  * return thunk isn't mapped into the userspace tables (then again, AMD
  * typically has NO_MELTDOWN).
  *
- * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
  * entry_ibpb() will clobber AX, CX, DX.
  *
  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
  */
 .macro UNTRAIN_RET
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING)
+       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
        VALIDATE_UNRET_END
        ALTERNATIVE_3 "",                                               \
-                     CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
                      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
 #endif
 .endm
 
+.macro UNTRAIN_RET_VM
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+       VALIDATE_UNRET_END
+       ALTERNATIVE_3 "",                                               \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
+                     "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT,    \
+                     __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
 .macro UNTRAIN_RET_FROM_CALL
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
        defined(CONFIG_CALL_DEPTH_TRACKING)
        VALIDATE_UNRET_END
        ALTERNATIVE_3 "",                                               \
-                     CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
                      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                      __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
 #endif
@@ -330,15 +342,24 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
 extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
 extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
 
+#ifdef CONFIG_RETHUNK
 extern void __x86_return_thunk(void);
-extern void zen_untrain_ret(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void retbleed_untrain_ret(void);
+extern void srso_untrain_ret(void);
+extern void srso_alias_untrain_ret(void);
+
+extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
 
-#ifdef CONFIG_CALL_THUNKS
 extern void (*x86_return_thunk)(void);
-#else
-#define x86_return_thunk       (&__x86_return_thunk)
-#endif
 
 #ifdef CONFIG_CALL_DEPTH_TRACKING
 extern void __x86_return_skl(void);
@@ -465,9 +486,6 @@ enum ssb_mitigation {
        SPEC_STORE_BYPASS_SECCOMP,
 };
 
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
 static __always_inline
 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 {
@@ -479,11 +497,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
                : "memory");
 }
 
+extern u64 x86_pred_cmd;
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-       u64 val = PRED_CMD_IBPB;
-
-       alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+       alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
 }
 
 /* The Intel SPEC CTRL MSR base value cache */
index d46300e..fd75024 100644 (file)
@@ -586,7 +586,6 @@ extern char                 ignore_fpu_irq;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
 # define BASE_PREFETCH         ""
@@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x)
                          "m" (*(const char *)x));
 }
 
-static inline void spin_lock_prefetch(const void *x)
-{
-       prefetchw(x);
-}
-
 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
                           TOP_OF_KERNEL_STACK_PADDING)
 
@@ -682,9 +676,13 @@ extern u16 get_llc_id(unsigned int cpu);
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
 extern u32 amd_get_highest_perf(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)       { return 0; }
 static inline u32 amd_get_highest_perf(void)           { return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void)             { }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
@@ -727,4 +725,6 @@ bool arch_is_platform_page(u64 paddr);
 #define arch_is_platform_page arch_is_platform_page
 #endif
 
+extern bool gds_ucode_mitigated(void);
+
 #endif /* _ASM_X86_PROCESSOR_H */
index 794f696..9d6411c 100644 (file)
@@ -56,7 +56,7 @@
 
 #define GDT_ENTRY_INVALID_SEG  0
 
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64)
 /*
  * The layout of the per-CPU GDT under Linux:
  *
index 21b542a..53369c5 100644 (file)
@@ -52,6 +52,7 @@ int acpi_lapic;
 int acpi_ioapic;
 int acpi_strict;
 int acpi_disable_cmcff;
+bool acpi_int_src_ovr[NR_IRQS_LEGACY];
 
 /* ACPI SCI override configuration */
 u8 acpi_sci_flags __initdata;
@@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
 
        acpi_table_print_madt_entry(&header->common);
 
+       if (intsrc->source_irq < NR_IRQS_LEGACY)
+               acpi_int_src_ovr[intsrc->source_irq] = true;
+
        if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
                acpi_sci_ioapic_setup(intsrc->source_irq,
                                      intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
index 2dcf3a0..099d58d 100644 (file)
@@ -687,10 +687,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 
 #ifdef CONFIG_RETHUNK
 
-#ifdef CONFIG_CALL_THUNKS
-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
-#endif
-
 /*
  * Rewrite the compiler generated return thunk tail-calls.
  *
index 26ad7ca..7eca6a8 100644 (file)
@@ -73,8 +73,13 @@ static const int amd_erratum_1054[] =
 static const int amd_zenbleed[] =
        AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
                           AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
                           AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
 
+static const int amd_div0[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 {
        int osvw_id = *erratum++;
@@ -1130,6 +1135,11 @@ static void init_amd(struct cpuinfo_x86 *c)
                WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
 
        zenbleed_check(c);
+
+       if (cpu_has_amd_erratum(c, amd_div0)) {
+               pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+               setup_force_cpu_bug(X86_BUG_DIV0);
+       }
 }
 
 #ifdef CONFIG_X86_32
@@ -1290,3 +1300,33 @@ void amd_check_microcode(void)
 {
        on_each_cpu(zenbleed_check_cpu, NULL, 1);
 }
+
+bool cpu_has_ibpb_brtype_microcode(void)
+{
+       switch (boot_cpu_data.x86) {
+       /* Zen1/2 IBPB flushes branch type predictions too. */
+       case 0x17:
+               return boot_cpu_has(X86_FEATURE_AMD_IBPB);
+       case 0x19:
+               /* Poke the MSR bit on Zen3/4 to check its presence. */
+               if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+                       setup_force_cpu_cap(X86_FEATURE_SBPB);
+                       return true;
+               } else {
+                       return false;
+               }
+       default:
+               return false;
+       }
+}
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+       asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+                    :: "a" (0), "d" (0), "r" (1));
+}
+EXPORT_SYMBOL_GPL(amd_clear_divider);
index 9550744..f081d26 100644 (file)
@@ -47,6 +47,8 @@ static void __init taa_select_mitigation(void);
 static void __init mmio_select_mitigation(void);
 static void __init srbds_select_mitigation(void);
 static void __init l1d_flush_select_mitigation(void);
+static void __init srso_select_mitigation(void);
+static void __init gds_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR without task-specific bits set */
 u64 x86_spec_ctrl_base;
@@ -56,8 +58,13 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
 DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
 
+u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
+EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
 static DEFINE_MUTEX(spec_ctrl_mutex);
 
+void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+
 /* Update SPEC_CTRL MSR and its cached copy unconditionally */
 static void update_spec_ctrl(u64 val)
 {
@@ -160,6 +167,13 @@ void __init cpu_select_mitigations(void)
        md_clear_select_mitigation();
        srbds_select_mitigation();
        l1d_flush_select_mitigation();
+
+       /*
+        * srso_select_mitigation() depends and must run after
+        * retbleed_select_mitigation().
+        */
+       srso_select_mitigation();
+       gds_select_mitigation();
 }
 
 /*
@@ -646,6 +660,149 @@ static int __init l1d_flush_parse_cmdline(char *str)
 early_param("l1d_flush", l1d_flush_parse_cmdline);
 
 #undef pr_fmt
+#define pr_fmt(fmt)    "GDS: " fmt
+
+enum gds_mitigations {
+       GDS_MITIGATION_OFF,
+       GDS_MITIGATION_UCODE_NEEDED,
+       GDS_MITIGATION_FORCE,
+       GDS_MITIGATION_FULL,
+       GDS_MITIGATION_FULL_LOCKED,
+       GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+       [GDS_MITIGATION_OFF]            = "Vulnerable",
+       [GDS_MITIGATION_UCODE_NEEDED]   = "Vulnerable: No microcode",
+       [GDS_MITIGATION_FORCE]          = "Mitigation: AVX disabled, no microcode",
+       [GDS_MITIGATION_FULL]           = "Mitigation: Microcode",
+       [GDS_MITIGATION_FULL_LOCKED]    = "Mitigation: Microcode (locked)",
+       [GDS_MITIGATION_HYPERVISOR]     = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+       return (gds_mitigation == GDS_MITIGATION_FULL ||
+               gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+       u64 mcu_ctrl_after;
+       u64 mcu_ctrl;
+
+       switch (gds_mitigation) {
+       case GDS_MITIGATION_OFF:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl |= GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FULL_LOCKED:
+               /*
+                * The LOCKED state comes from the boot CPU. APs might not have
+                * the same state. Make sure the mitigation is enabled on all
+                * CPUs.
+                */
+       case GDS_MITIGATION_FULL:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl &= ~GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FORCE:
+       case GDS_MITIGATION_UCODE_NEEDED:
+       case GDS_MITIGATION_HYPERVISOR:
+               return;
+       };
+
+       wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+       /*
+        * Check to make sure that the WRMSR value was not ignored. Writes to
+        * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+        * processor was not.
+        */
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+       WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+       u64 mcu_ctrl;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+               gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+               goto out;
+       }
+
+       if (cpu_mitigations_off())
+               gds_mitigation = GDS_MITIGATION_OFF;
+       /* Will verify below that mitigation _can_ be disabled */
+
+       /* No microcode */
+       if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+               if (gds_mitigation == GDS_MITIGATION_FORCE) {
+                       /*
+                        * This only needs to be done on the boot CPU so do it
+                        * here rather than in update_gds_msr()
+                        */
+                       setup_clear_cpu_cap(X86_FEATURE_AVX);
+                       pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+               } else {
+                       gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+               }
+               goto out;
+       }
+
+       /* Microcode has mitigation, use it */
+       if (gds_mitigation == GDS_MITIGATION_FORCE)
+               gds_mitigation = GDS_MITIGATION_FULL;
+
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+       if (mcu_ctrl & GDS_MITG_LOCKED) {
+               if (gds_mitigation == GDS_MITIGATION_OFF)
+                       pr_warn("Mitigation locked. Disable failed.\n");
+
+               /*
+                * The mitigation is selected from the boot CPU. All other CPUs
+                * _should_ have the same state. If the boot CPU isn't locked
+                * but others are then update_gds_msr() will WARN() of the state
+                * mismatch. If the boot CPU is locked update_gds_msr() will
+                * ensure the other CPUs have the mitigation enabled.
+                */
+               gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+       }
+
+       update_gds_msr();
+out:
+       pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return 0;
+
+       if (!strcmp(str, "off"))
+               gds_mitigation = GDS_MITIGATION_OFF;
+       else if (!strcmp(str, "force"))
+               gds_mitigation = GDS_MITIGATION_FORCE;
+
+       return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
 enum spectre_v1_mitigation {
@@ -885,6 +1042,9 @@ do_cmd_auto:
                setup_force_cpu_cap(X86_FEATURE_RETHUNK);
                setup_force_cpu_cap(X86_FEATURE_UNRET);
 
+               if (IS_ENABLED(CONFIG_RETHUNK))
+                       x86_return_thunk = retbleed_return_thunk;
+
                if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
                    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
                        pr_err(RETBLEED_UNTRAIN_MSG);
@@ -894,6 +1054,7 @@ do_cmd_auto:
 
        case RETBLEED_MITIGATION_IBPB:
                setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
                mitigate_smt = true;
                break;
 
@@ -2188,6 +2349,170 @@ static int __init l1tf_cmdline(char *str)
 early_param("l1tf", l1tf_cmdline);
 
 #undef pr_fmt
+#define pr_fmt(fmt)    "Speculative Return Stack Overflow: " fmt
+
+enum srso_mitigation {
+       SRSO_MITIGATION_NONE,
+       SRSO_MITIGATION_MICROCODE,
+       SRSO_MITIGATION_SAFE_RET,
+       SRSO_MITIGATION_IBPB,
+       SRSO_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+enum srso_mitigation_cmd {
+       SRSO_CMD_OFF,
+       SRSO_CMD_MICROCODE,
+       SRSO_CMD_SAFE_RET,
+       SRSO_CMD_IBPB,
+       SRSO_CMD_IBPB_ON_VMEXIT,
+};
+
+static const char * const srso_strings[] = {
+       [SRSO_MITIGATION_NONE]           = "Vulnerable",
+       [SRSO_MITIGATION_MICROCODE]      = "Mitigation: microcode",
+       [SRSO_MITIGATION_SAFE_RET]       = "Mitigation: safe RET",
+       [SRSO_MITIGATION_IBPB]           = "Mitigation: IBPB",
+       [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
+};
+
+static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
+static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
+
+static int __init srso_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off"))
+               srso_cmd = SRSO_CMD_OFF;
+       else if (!strcmp(str, "microcode"))
+               srso_cmd = SRSO_CMD_MICROCODE;
+       else if (!strcmp(str, "safe-ret"))
+               srso_cmd = SRSO_CMD_SAFE_RET;
+       else if (!strcmp(str, "ibpb"))
+               srso_cmd = SRSO_CMD_IBPB;
+       else if (!strcmp(str, "ibpb-vmexit"))
+               srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
+       else
+               pr_err("Ignoring unknown SRSO option (%s).", str);
+
+       return 0;
+}
+early_param("spec_rstack_overflow", srso_parse_cmdline);
+
+#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options."
+
+static void __init srso_select_mitigation(void)
+{
+       bool has_microcode;
+
+       if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
+               goto pred_cmd;
+
+       /*
+        * The first check is for the kernel running as a guest in order
+        * for guests to verify whether IBPB is a viable mitigation.
+        */
+       has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
+       if (!has_microcode) {
+               pr_warn("IBPB-extending microcode not applied!\n");
+               pr_warn(SRSO_NOTICE);
+       } else {
+               /*
+                * Enable the synthetic (even if in a real CPUID leaf)
+                * flags for guests.
+                */
+               setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+
+               /*
+                * Zen1/2 with SMT off aren't vulnerable after the right
+                * IBPB microcode has been applied.
+                */
+               if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
+                       setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+                       return;
+               }
+       }
+
+       if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+               if (has_microcode) {
+                       pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
+                       srso_mitigation = SRSO_MITIGATION_IBPB;
+                       goto pred_cmd;
+               }
+       }
+
+       switch (srso_cmd) {
+       case SRSO_CMD_OFF:
+               return;
+
+       case SRSO_CMD_MICROCODE:
+               if (has_microcode) {
+                       srso_mitigation = SRSO_MITIGATION_MICROCODE;
+                       pr_warn(SRSO_NOTICE);
+               }
+               break;
+
+       case SRSO_CMD_SAFE_RET:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       /*
+                        * Enable the return thunk for generated code
+                        * like ftrace, static_call, etc.
+                        */
+                       setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+                       setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+                       if (boot_cpu_data.x86 == 0x19) {
+                               setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+                               x86_return_thunk = srso_alias_return_thunk;
+                       } else {
+                               setup_force_cpu_cap(X86_FEATURE_SRSO);
+                               x86_return_thunk = srso_return_thunk;
+                       }
+                       srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB:
+               if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       if (has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+                               srso_mitigation = SRSO_MITIGATION_IBPB;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB_ON_VMEXIT:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+                               srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+                }
+               break;
+
+       default:
+               break;
+       }
+
+       pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
+
+pred_cmd:
+       if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
+            boot_cpu_has(X86_FEATURE_SBPB))
+               x86_pred_cmd = PRED_CMD_SBPB;
+}
+
+#undef pr_fmt
 #define pr_fmt(fmt) fmt
 
 #ifdef CONFIG_SYSFS
@@ -2385,6 +2710,21 @@ static ssize_t retbleed_show_state(char *buf)
        return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
 }
 
+static ssize_t srso_show_state(char *buf)
+{
+       if (boot_cpu_has(X86_FEATURE_SRSO_NO))
+               return sysfs_emit(buf, "Mitigation: SMT disabled\n");
+
+       return sysfs_emit(buf, "%s%s\n",
+                         srso_strings[srso_mitigation],
+                         (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
+}
+
+static ssize_t gds_show_state(char *buf)
+{
+       return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
                               char *buf, unsigned int bug)
 {
@@ -2434,6 +2774,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
        case X86_BUG_RETBLEED:
                return retbleed_show_state(buf);
 
+       case X86_BUG_SRSO:
+               return srso_show_state(buf);
+
+       case X86_BUG_GDS:
+               return gds_show_state(buf);
+
        default:
                break;
        }
@@ -2498,4 +2844,14 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
 {
        return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
 }
+
+ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
+}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
 #endif
index 0ba1067..e3a65e9 100644 (file)
@@ -1250,6 +1250,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 #define RETBLEED       BIT(3)
 /* CPU is affected by SMT (cross-thread) return predictions */
 #define SMT_RSB                BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO           BIT(5)
+/* CPU is affected by GDS */
+#define GDS            BIT(6)
 
 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
        VULNBL_INTEL_STEPPINGS(IVYBRIDGE,       X86_STEPPING_ANY,               SRBDS),
@@ -1262,27 +1266,30 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
        VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
        VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
        VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,    X86_STEPPING_ANY,               RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE_L,     X86_STEPPING_ANY,               GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE,       X86_STEPPING_ANY,               GDS),
        VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
 
        VULNBL_AMD(0x15, RETBLEED),
        VULNBL_AMD(0x16, RETBLEED),
-       VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
        VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x19, SRSO),
        {}
 };
 
@@ -1406,6 +1413,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
        if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
                setup_force_cpu_bug(X86_BUG_SMT_RSB);
 
+       if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, SRSO))
+                       setup_force_cpu_bug(X86_BUG_SRSO);
+       }
+
+       /*
+        * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+        * an affected processor, the VMM may have disabled the use of GATHER by
+        * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+        * which means that AVX will be disabled.
+        */
+       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+           boot_cpu_has(X86_FEATURE_AVX))
+               setup_force_cpu_bug(X86_BUG_GDS);
+
        if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
                return;
 
@@ -1962,6 +1984,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
        validate_apic_and_package_id(c);
        x86_spec_ctrl_setup_ap();
        update_srbds_msr();
+       if (boot_cpu_has_bug(X86_BUG_GDS))
+               update_gds_msr();
 
        tsx_ap_init();
 }
index 1c44630..1dcd7d4 100644 (file)
@@ -83,6 +83,7 @@ void cpu_select_mitigations(void);
 
 extern void x86_spec_ctrl_setup_ap(void);
 extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
 
 extern enum spectre_v2_mitigation spectre_v2_enabled;
 
index af5cbdd..f6d856b 100644 (file)
@@ -19,8 +19,7 @@
  * FPU state for a task MUST let the rest of the kernel know that the
  * FPU registers are no longer valid for this task.
  *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
+ * Invalidate a resource you control: CPU if using the CPU for something else
  * (with preemption disabled), FPU for the current task, or a task that
  * is prevented from running by the current task.
  */
index 1015af1..98e507c 100644 (file)
@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
        struct fpu *fpu = &current->thread.fpu;
 
        fpregs_lock();
-       fpu__drop(fpu);
+       __fpu_invalidate_fpregs_state(fpu);
        /*
         * This does not change the actual hardware registers. It just
         * resets the memory image and sets TIF_NEED_FPU_LOAD so a
index 0bab497..1afbc48 100644 (file)
@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
                goto out_disable;
        }
 
+       /*
+        * CPU capabilities initialization runs before FPU init. So
+        * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+        * functional, set the feature bit so depending code works.
+        */
+       setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
        print_xstate_offset_size();
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                fpu_kernel_cfg.max_features,
index 57b0037..517821b 100644 (file)
@@ -226,7 +226,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 }
 
 /* Check whether insn is indirect jump */
-static int __insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
 {
        return ((insn->opcode.bytes[0] == 0xff &&
                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -260,26 +260,6 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
        return (start <= target && target <= start + len);
 }
 
-static int insn_is_indirect_jump(struct insn *insn)
-{
-       int ret = __insn_is_indirect_jump(insn);
-
-#ifdef CONFIG_RETPOLINE
-       /*
-        * Jump to x86_indirect_thunk_* is treated as an indirect jump.
-        * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
-        * older gcc may use indirect jump. So we add this check instead of
-        * replace indirect-jump check.
-        */
-       if (!ret)
-               ret = insn_jump_into_range(insn,
-                               (unsigned long)__indirect_thunk_start,
-                               (unsigned long)__indirect_thunk_end -
-                               (unsigned long)__indirect_thunk_start);
-#endif
-       return ret;
-}
-
 /* Decode whole function to ensure any instructions don't jump into target */
 static int can_optimize(unsigned long paddr)
 {
@@ -334,9 +314,21 @@ static int can_optimize(unsigned long paddr)
                /* Recover address */
                insn.kaddr = (void *)addr;
                insn.next_byte = (void *)(addr + insn.length);
-               /* Check any instructions don't jump into target */
-               if (insn_is_indirect_jump(&insn) ||
-                   insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+               /*
+                * Check any instructions don't jump into target, indirectly or
+                * directly.
+                *
+                * The indirect case is present to handle a code with jump
+                * tables. When the kernel uses retpolines, the check should in
+                * theory additionally look for jumps to indirect thunks.
+                * However, the kernel built with retpolines or IBT has jump
+                * tables disabled so the check can be skipped altogether.
+                */
+               if (!IS_ENABLED(CONFIG_RETPOLINE) &&
+                   !IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+                   insn_is_indirect_jump(&insn))
+                       return 0;
+               if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
                                         DISP32_SIZE))
                        return 0;
                addr += insn.length;
index b70670a..77a9316 100644 (file)
@@ -186,6 +186,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform);
  */
 bool __static_call_fixup(void *tramp, u8 op, void *dest)
 {
+       unsigned long addr = (unsigned long)tramp;
+       /*
+        * Not all .return_sites are a static_call trampoline (most are not).
+        * Check if the 3 bytes after the return are still kernel text, if not,
+        * then this definitely is not a trampoline and we need not worry
+        * further.
+        *
+        * This avoids the memcmp() below tripping over pagefaults etc..
+        */
+       if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) &&
+           !kernel_text_address(addr + 7))
+               return false;
+
        if (memcmp(tramp+5, tramp_ud, 3)) {
                /* Not a trampoline site, not our problem. */
                return false;
index 03c885d..83d41c2 100644 (file)
@@ -133,14 +133,26 @@ SECTIONS
                KPROBES_TEXT
                SOFTIRQENTRY_TEXT
 #ifdef CONFIG_RETPOLINE
-               __indirect_thunk_start = .;
-               *(.text.__x86.*)
-               __indirect_thunk_end = .;
+               *(.text..__x86.indirect_thunk)
+               *(.text..__x86.return_thunk)
 #endif
                STATIC_CALL_TEXT
 
                ALIGN_ENTRY_TEXT_BEGIN
+#ifdef CONFIG_CPU_SRSO
+               *(.text..__x86.rethunk_untrain)
+#endif
+
                ENTRY_TEXT
+
+#ifdef CONFIG_CPU_SRSO
+               /*
+                * See the comment above srso_alias_untrain_ret()'s
+                * definition.
+                */
+               . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+               *(.text..__x86.rethunk_safe)
+#endif
                ALIGN_ENTRY_TEXT_END
                *(.gnu.warning)
 
@@ -509,7 +521,24 @@ INIT_PER_CPU(irq_stack_backing_store);
 #endif
 
 #ifdef CONFIG_RETHUNK
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+#endif
+
+#ifdef CONFIG_CPU_SRSO
+/*
+ * GNU ld cannot do XOR until 2.41.
+ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
+ *
+ * LLVM lld cannot do XOR until lld-17.
+ * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb
+ *
+ * Instead do: (A | B) - (A & B) in order to compute the XOR
+ * of the two function addresses:
+ */
+. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
+               (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+               "SRSO function pair won't alias");
 #endif
 
 #endif /* CONFIG_X86_64 */
index 7f4d133..d343268 100644 (file)
@@ -729,6 +729,9 @@ void kvm_set_cpu_caps(void)
                F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
        );
 
+       if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
+               kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
+
        kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
                F(PERFMON_V2)
        );
index ec169f5..7b52e31 100644 (file)
@@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
        return kvm_x86_ops.flush_remote_tlbs_range;
 }
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
 {
-       int ret = -EOPNOTSUPP;
+       if (!kvm_x86_ops.flush_remote_tlbs_range)
+               return -EOPNOTSUPP;
 
-       if (kvm_x86_ops.flush_remote_tlbs_range)
-               ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
-                                                                  nr_pages);
-       if (ret)
-               kvm_flush_remote_tlbs(kvm);
+       return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
 }
 
 static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -1588,7 +1584,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
        for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
                                 range->start, range->end - 1, &iterator)
                ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
-                              iterator.level, range->pte);
+                              iterator.level, range->arg.pte);
 
        return ret;
 }
@@ -6670,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
         */
        if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
                            PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
-               kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               kvm_flush_remote_tlbs_memslot(kvm, slot);
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@@ -6689,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        }
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       /*
-        * All current use cases for flushing the TLBs for a specific memslot
-        * related to dirty logging, and many do the TLB flush out of mmu_lock.
-        * The interaction between the various operations on memslot must be
-        * serialized by slots_locks to ensure the TLB flush from one operation
-        * is observed by any other operation on the same memslot.
-        */
-       lockdep_assert_held(&kvm->slots_lock);
-       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot)
 {
index d39af56..86cb83b 100644 (file)
@@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn,
                                    int min_level);
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages);
-
 /* Flush the given page (huge or not) of guest memory. */
 static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
 {
index 512163d..6250bd3 100644 (file)
@@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
        u64 new_spte;
 
        /* Huge pages aren't expected to be modified without first being zapped. */
-       WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+       WARN_ON(pte_huge(range->arg.pte) || range->start + 1 != range->end);
 
        if (iter->level != PG_LEVEL_4K ||
            !is_shadow_present_pte(iter->old_spte))
@@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
         */
        tdp_mmu_iter_set_spte(kvm, iter, 0);
 
-       if (!pte_write(range->pte)) {
+       if (!pte_write(range->arg.pte)) {
                new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
-                                                                 pte_pfn(range->pte));
+                                                                 pte_pfn(range->arg.pte));
 
                tdp_mmu_iter_set_spte(kvm, iter, new_spte);
        }
index 07756b7..d3aec1f 100644 (file)
@@ -2417,15 +2417,18 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
         */
        memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
 
-       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+       BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
+       memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
 
-       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
 
-       if (ghcb_xcr0_is_valid(ghcb)) {
+       svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+
+       if (kvm_ghcb_xcr0_is_valid(svm)) {
                vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
                kvm_update_cpuid_runtime(vcpu);
        }
@@ -2436,84 +2439,88 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
        control->exit_code_hi = upper_32_bits(exit_code);
        control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
        control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+       svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
 
        /* Clear the valid entries fields */
        memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+       return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 {
-       struct kvm_vcpu *vcpu;
-       struct ghcb *ghcb;
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
        u64 exit_code;
        u64 reason;
 
-       ghcb = svm->sev_es.ghcb;
-
        /*
         * Retrieve the exit code now even though it may not be marked valid
         * as it could help with debugging.
         */
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
 
        /* Only GHCB Usage code 0 is supported */
-       if (ghcb->ghcb_usage) {
+       if (svm->sev_es.ghcb->ghcb_usage) {
                reason = GHCB_ERR_INVALID_USAGE;
                goto vmgexit_err;
        }
 
        reason = GHCB_ERR_MISSING_INPUT;
 
-       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_2_is_valid(ghcb))
+       if (!kvm_ghcb_sw_exit_code_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_1_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_2_is_valid(svm))
                goto vmgexit_err;
 
-       switch (ghcb_get_sw_exit_code(ghcb)) {
+       switch (exit_code) {
        case SVM_EXIT_READ_DR7:
                break;
        case SVM_EXIT_WRITE_DR7:
-               if (!ghcb_rax_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_RDTSC:
                break;
        case SVM_EXIT_RDPMC:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_CPUID:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
-               if (ghcb_get_rax(ghcb) == 0xd)
-                       if (!ghcb_xcr0_is_valid(ghcb))
+               if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
+                       if (!kvm_ghcb_xcr0_is_valid(svm))
                                goto vmgexit_err;
                break;
        case SVM_EXIT_INVD:
                break;
        case SVM_EXIT_IOIO:
-               if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
-                       if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
+                       if (!kvm_ghcb_sw_scratch_is_valid(svm))
                                goto vmgexit_err;
                } else {
-                       if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
-                               if (!ghcb_rax_is_valid(ghcb))
+                       if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
+                               if (!kvm_ghcb_rax_is_valid(svm))
                                        goto vmgexit_err;
                }
                break;
        case SVM_EXIT_MSR:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
-               if (ghcb_get_sw_exit_info_1(ghcb)) {
-                       if (!ghcb_rax_is_valid(ghcb) ||
-                           !ghcb_rdx_is_valid(ghcb))
+               if (control->exit_info_1) {
+                       if (!kvm_ghcb_rax_is_valid(svm) ||
+                           !kvm_ghcb_rdx_is_valid(svm))
                                goto vmgexit_err;
                }
                break;
        case SVM_EXIT_VMMCALL:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_cpl_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_cpl_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_RDTSCP:
@@ -2521,19 +2528,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        case SVM_EXIT_WBINVD:
                break;
        case SVM_EXIT_MONITOR:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb) ||
-                   !ghcb_rdx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm) ||
+                   !kvm_ghcb_rdx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_MWAIT:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_VMGEXIT_MMIO_READ:
        case SVM_VMGEXIT_MMIO_WRITE:
-               if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (!kvm_ghcb_sw_scratch_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_VMGEXIT_NMI_COMPLETE:
@@ -2549,11 +2556,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        return 0;
 
 vmgexit_err:
-       vcpu = &svm->vcpu;
-
        if (reason == GHCB_ERR_INVALID_USAGE) {
                vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
-                           ghcb->ghcb_usage);
+                           svm->sev_es.ghcb->ghcb_usage);
        } else if (reason == GHCB_ERR_INVALID_EVENT) {
                vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
                            exit_code);
@@ -2563,11 +2568,8 @@ vmgexit_err:
                dump_ghcb(svm);
        }
 
-       /* Clear the valid entries fields */
-       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, reason);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
 
        /* Resume the guest to "return" the error code. */
        return 1;
@@ -2586,7 +2588,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
                 */
                if (svm->sev_es.ghcb_sa_sync) {
                        kvm_write_guest(svm->vcpu.kvm,
-                                       ghcb_get_sw_scratch(svm->sev_es.ghcb),
+                                       svm->sev_es.sw_scratch,
                                        svm->sev_es.ghcb_sa,
                                        svm->sev_es.ghcb_sa_len);
                        svm->sev_es.ghcb_sa_sync = false;
@@ -2632,12 +2634,11 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
 static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
-       struct ghcb *ghcb = svm->sev_es.ghcb;
        u64 ghcb_scratch_beg, ghcb_scratch_end;
        u64 scratch_gpa_beg, scratch_gpa_end;
        void *scratch_va;
 
-       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       scratch_gpa_beg = svm->sev_es.sw_scratch;
        if (!scratch_gpa_beg) {
                pr_err("vmgexit: scratch gpa not provided\n");
                goto e_scratch;
@@ -2708,8 +2709,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
        return 0;
 
 e_scratch:
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
 
        return 1;
 }
@@ -2822,7 +2823,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb_control_area *control = &svm->vmcb->control;
        u64 ghcb_gpa, exit_code;
-       struct ghcb *ghcb;
        int ret;
 
        /* Validate the GHCB */
@@ -2847,20 +2847,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
        }
 
        svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
-       ghcb = svm->sev_es.ghcb_map.hva;
 
-       trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb);
 
+       sev_es_sync_from_ghcb(svm);
        ret = sev_es_validate_vmgexit(svm);
        if (ret)
                return ret;
 
-       sev_es_sync_from_ghcb(svm);
-       ghcb_set_sw_exit_info_1(ghcb, 0);
-       ghcb_set_sw_exit_info_2(ghcb, 0);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
 
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
        switch (exit_code) {
        case SVM_VMGEXIT_MMIO_READ:
                ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -2898,13 +2896,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                        break;
                case 1:
                        /* Get AP jump table address */
-                       ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
                        break;
                default:
                        pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
                               control->exit_info_1);
-                       ghcb_set_sw_exit_info_1(ghcb, 2);
-                       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+                       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
                }
 
                ret = 1;
index 956726d..d4bfdc6 100644 (file)
@@ -1498,7 +1498,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        if (sd->current_vmcb != svm->vmcb) {
                sd->current_vmcb = svm->vmcb;
-               indirect_branch_prediction_barrier();
+
+               if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
+                       indirect_branch_prediction_barrier();
        }
        if (kvm_vcpu_apicv_active(vcpu))
                avic_vcpu_load(vcpu, cpu);
@@ -4004,6 +4006,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
 
        guest_state_enter_irqoff();
 
+       amd_clear_divider();
+
        if (sev_es_guest(vcpu->kvm))
                __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
        else
index 18af7e7..8239c8d 100644 (file)
@@ -190,10 +190,12 @@ struct vcpu_sev_es_state {
        /* SEV-ES support */
        struct sev_es_save_area *vmsa;
        struct ghcb *ghcb;
+       u8 valid_bitmap[16];
        struct kvm_host_map ghcb_map;
        bool received_first_sipi;
 
        /* SEV-ES scratch area support */
+       u64 sw_scratch;
        void *ghcb_sa;
        u32 ghcb_sa_len;
        bool ghcb_sa_sync;
@@ -744,4 +746,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
 void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
 
+#define DEFINE_KVM_GHCB_ACCESSORS(field)                                               \
+       static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
+       {                                                                       \
+               return test_bit(GHCB_BITMAP_IDX(field),                         \
+                               (unsigned long *)&svm->sev_es.valid_bitmap);    \
+       }                                                                       \
+                                                                               \
+       static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
+       {                                                                       \
+               return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
+       }                                                                       \
+
+DEFINE_KVM_GHCB_ACCESSORS(cpl)
+DEFINE_KVM_GHCB_ACCESSORS(rax)
+DEFINE_KVM_GHCB_ACCESSORS(rcx)
+DEFINE_KVM_GHCB_ACCESSORS(rdx)
+DEFINE_KVM_GHCB_ACCESSORS(rbx)
+DEFINE_KVM_GHCB_ACCESSORS(rsi)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
+DEFINE_KVM_GHCB_ACCESSORS(xcr0)
+
 #endif
index 8e8295e..ef2ebab 100644 (file)
@@ -222,7 +222,7 @@ SYM_FUNC_START(__svm_vcpu_run)
         * because interrupt handlers won't sanitize 'ret' if the return is
         * from the kernel.
         */
-       UNTRAIN_RET
+       UNTRAIN_RET_VM
 
        /*
         * Clear all general purpose registers except RSP and RAX to prevent
@@ -359,7 +359,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
         * because interrupt handlers won't sanitize RET if the return is
         * from the kernel.
         */
-       UNTRAIN_RET
+       UNTRAIN_RET_VM
 
        /* "Pop" @spec_ctrl_intercepted.  */
        pop %_ASM_BX
index 278dbd3..94fa36e 100644 (file)
@@ -1616,7 +1616,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
         ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
         ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
         ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
-        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
 
 static u64 kvm_get_arch_capabilities(void)
 {
@@ -1673,6 +1673,9 @@ static u64 kvm_get_arch_capabilities(void)
                 */
        }
 
+       if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
+               data |= ARCH_CAP_GDS_NO;
+
        return data;
 }
 
@@ -11795,15 +11798,22 @@ static int sync_regs(struct kvm_vcpu *vcpu)
                __set_regs(vcpu, &vcpu->run->s.regs.regs);
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
-               if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+               struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
+
+               if (__set_sregs(vcpu, &sregs))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
-               if (kvm_vcpu_ioctl_x86_set_vcpu_events(
-                               vcpu, &vcpu->run->s.regs.events))
+               struct kvm_vcpu_events events = vcpu->run->s.regs.events;
+
+               if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
        }
 
@@ -12769,7 +12779,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                 * See is_writable_pte() for more details (the case involving
                 * access-tracked SPTEs is particularly relevant).
                 */
-               kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+               kvm_flush_remote_tlbs_memslot(kvm, new);
        }
 }
 
index 3fd066d..cd86aeb 100644 (file)
@@ -11,8 +11,9 @@
 #include <asm/unwind_hints.h>
 #include <asm/percpu.h>
 #include <asm/frame.h>
+#include <asm/nops.h>
 
-       .section .text.__x86.indirect_thunk
+       .section .text..__x86.indirect_thunk
 
 
 .macro POLINE reg
@@ -131,36 +132,107 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
  */
 #ifdef CONFIG_RETHUNK
 
-       .section .text.__x86.return_thunk
+/*
+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * special addresses:
+ *
+ * - srso_alias_untrain_ret() is 2M aligned
+ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * and 20 in its virtual address are set (while those bits in the
+ * srso_alias_untrain_ret() function are cleared).
+ *
+ * This guarantees that those two addresses will alias in the branch
+ * target buffer of Zen3/4 generations, leading to any potential
+ * poisoned entries at that BTB slot to get evicted.
+ *
+ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+#ifdef CONFIG_CPU_SRSO
+       .section .text..__x86.rethunk_untrain
+
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       ASM_NOP2
+       lfence
+       jmp srso_alias_return_thunk
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
+
+       .section .text..__x86.rethunk_safe
+#else
+/* dummy definition for alternatives */
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+#endif
+
+SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       lea 8(%_ASM_SP), %_ASM_SP
+       UNWIND_HINT_FUNC
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_FUNC_END(srso_alias_safe_ret)
+
+       .section .text..__x86.return_thunk
+
+SYM_CODE_START(srso_alias_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       call srso_alias_safe_ret
+       ud2
+SYM_CODE_END(srso_alias_return_thunk)
+
+/*
+ * Some generic notes on the untraining sequences:
+ *
+ * They are interchangeable when it comes to flushing potentially wrong
+ * RET predictions from the BTB.
+ *
+ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
+ * Retbleed sequence because the return sequence done there
+ * (srso_safe_ret()) is longer and the return sequence must fully nest
+ * (end before) the untraining sequence. Therefore, the untraining
+ * sequence must fully overlap the return sequence.
+ *
+ * Regarding alignment - the instructions which need to be untrained,
+ * must all start at a cacheline boundary for Zen1/2 generations. That
+ * is, instruction sequences starting at srso_safe_ret() and
+ * the respective instruction sequences at retbleed_return_thunk()
+ * must start at a cacheline boundary.
+ */
 
 /*
  * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
  *    alignment within the BTB.
- * 2) The instruction at zen_untrain_ret must contain, and not
+ * 2) The instruction at retbleed_untrain_ret must contain, and not
  *    end with, the 0xc3 byte of the RET.
  * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
  *    from re-poisioning the BTB prediction.
  */
        .align 64
-       .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
-SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
+SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
        ANNOTATE_NOENDBR
        /*
-        * As executed from zen_untrain_ret, this is:
+        * As executed from retbleed_untrain_ret, this is:
         *
         *   TEST $0xcc, %bl
         *   LFENCE
-        *   JMP __x86_return_thunk
+        *   JMP retbleed_return_thunk
         *
         * Executing the TEST instruction has a side effect of evicting any BTB
         * prediction (potentially attacker controlled) attached to the RET, as
-        * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+        * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
         */
        .byte   0xf6
 
        /*
-        * As executed from __x86_return_thunk, this is a plain RET.
+        * As executed from retbleed_return_thunk, this is a plain RET.
         *
         * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
         *
@@ -172,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
         * With SMT enabled and STIBP active, a sibling thread cannot poison
         * RET's prediction to a type of its choice, but can evict the
         * prediction due to competitive sharing. If the prediction is
-        * evicted, __x86_return_thunk will suffer Straight Line Speculation
+        * evicted, retbleed_return_thunk will suffer Straight Line Speculation
         * which will be contained safely by the INT3.
         */
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
        ret
        int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(retbleed_return_thunk)
 
        /*
         * Ensure the TEST decoding / BTB invalidation is complete.
@@ -189,11 +261,67 @@ SYM_CODE_END(__x86_return_thunk)
         * Jump back and execute the RET in the middle of the TEST instruction.
         * INT3 is for SLS protection.
         */
-       jmp __x86_return_thunk
+       jmp retbleed_return_thunk
        int3
-SYM_FUNC_END(zen_untrain_ret)
-__EXPORT_THUNK(zen_untrain_ret)
+SYM_FUNC_END(retbleed_untrain_ret)
+__EXPORT_THUNK(retbleed_untrain_ret)
 
+/*
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+       .align 64
+       .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_NOENDBR
+       .byte 0x48, 0xb8
+
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below).  This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+       lea 8(%_ASM_SP), %_ASM_SP
+       ret
+       int3
+       int3
+       /* end of movabs */
+       lfence
+       call srso_safe_ret
+       ud2
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+__EXPORT_THUNK(srso_untrain_ret)
+
+SYM_CODE_START(srso_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       call srso_safe_ret
+       ud2
+SYM_CODE_END(srso_return_thunk)
+
+SYM_FUNC_START(entry_untrain_ret)
+       ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+                     "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+                     "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+SYM_FUNC_END(entry_untrain_ret)
+__EXPORT_THUNK(entry_untrain_ret)
+
+SYM_CODE_START(__x86_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_CODE_END(__x86_return_thunk)
 EXPORT_SYMBOL(__x86_return_thunk)
 
 #endif /* CONFIG_RETHUNK */
index fc49be6..9faafcd 100644 (file)
@@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work)
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
        if (blkg->parent)
                blkg_put(blkg->parent);
+       spin_lock_irq(&q->queue_lock);
        list_del_init(&blkg->q_node);
+       spin_unlock_irq(&q->queue_lock);
        mutex_unlock(&q->blkcg_mutex);
 
        blk_put_queue(q);
index 90de500..9866468 100644 (file)
@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio)
        struct block_device *bdev = bio->bi_bdev;
        struct request_queue *q = bdev_get_queue(bdev);
        blk_status_t status = BLK_STS_IOERR;
-       struct blk_plug *plug;
 
        might_sleep();
 
-       plug = blk_mq_plug(bio);
-       if (plug && plug->nowait)
-               bio->bi_opf |= REQ_NOWAIT;
-
        /*
         * For a REQ_NOWAIT based request, return -EOPNOTSUPP
         * if queue does not support NOWAIT.
@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
        plug->rq_count = 0;
        plug->multiple_queues = false;
        plug->has_elevator = false;
-       plug->nowait = false;
        INIT_LIST_HEAD(&plug->cb_list);
 
        /*
index ad9844c..e6468ea 100644 (file)
@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot {
        struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
 } *blk_crypto_keyslots;
 
-static struct blk_crypto_profile blk_crypto_fallback_profile;
+static struct blk_crypto_profile *blk_crypto_fallback_profile;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
 static struct bio_set crypto_bio_split;
@@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
         * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
         * this bio's algorithm and key.
         */
-       blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+       blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
                                        bc->bc_key, &slot);
        if (blk_st != BLK_STS_OK) {
                src_bio->bi_status = blk_st;
@@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
         * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
         * this bio's algorithm and key.
         */
-       blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+       blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
                                        bc->bc_key, &slot);
        if (blk_st != BLK_STS_OK) {
                bio->bi_status = blk_st;
@@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
                return false;
        }
 
-       if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+       if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile,
                                        &bc->bc_key->crypto_cfg)) {
                bio->bi_status = BLK_STS_NOTSUPP;
                return false;
@@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
 
 int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
 {
-       return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
+       return __blk_crypto_evict_key(blk_crypto_fallback_profile, key);
 }
 
 static bool blk_crypto_fallback_inited;
@@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void)
 {
        int i;
        int err;
-       struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
 
        if (blk_crypto_fallback_inited)
                return 0;
@@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void)
        if (err)
                goto out;
 
-       err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
-       if (err)
+       /* Dynamic allocation is needed because of lockdep_register_key(). */
+       blk_crypto_fallback_profile =
+               kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL);
+       if (!blk_crypto_fallback_profile) {
+               err = -ENOMEM;
                goto fail_free_bioset;
+       }
+
+       err = blk_crypto_profile_init(blk_crypto_fallback_profile,
+                                     blk_crypto_num_keyslots);
+       if (err)
+               goto fail_free_profile;
        err = -ENOMEM;
 
-       profile->ll_ops = blk_crypto_fallback_ll_ops;
-       profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+       blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops;
+       blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
 
        /* All blk-crypto modes have a crypto API fallback. */
        for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
-               profile->modes_supported[i] = 0xFFFFFFFF;
-       profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+               blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF;
+       blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
 
        blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
                                        WQ_UNBOUND | WQ_HIGHPRI |
@@ -597,7 +605,9 @@ fail_free_keyslots:
 fail_free_wq:
        destroy_workqueue(blk_crypto_wq);
 fail_destroy_profile:
-       blk_crypto_profile_destroy(profile);
+       blk_crypto_profile_destroy(blk_crypto_fallback_profile);
+fail_free_profile:
+       kfree(blk_crypto_fallback_profile);
 fail_free_bioset:
        bioset_exit(&crypto_bio_split);
 out:
index dd64e20..089fcb9 100644 (file)
@@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
        if (qos[QOS_MIN] > qos[QOS_MAX])
                goto einval;
 
-       if (enable) {
+       if (enable && !ioc->enabled) {
                blk_stat_enable_accounting(disk->queue);
                blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                ioc->enabled = true;
-       } else {
+       } else if (!enable && ioc->enabled) {
+               blk_stat_disable_accounting(disk->queue);
                blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                ioc->enabled = false;
        }
index b04ff6f..953f083 100644 (file)
@@ -681,6 +681,21 @@ out_queue_exit:
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
+static void blk_mq_finish_request(struct request *rq)
+{
+       struct request_queue *q = rq->q;
+
+       if (rq->rq_flags & RQF_USE_SCHED) {
+               q->elevator->type->ops.finish_request(rq);
+               /*
+                * For postflush request that may need to be
+                * completed twice, we should clear this flag
+                * to avoid double finish_request() on the rq.
+                */
+               rq->rq_flags &= ~RQF_USE_SCHED;
+       }
+}
+
 static void __blk_mq_free_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
@@ -707,9 +722,7 @@ void blk_mq_free_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
 
-       if ((rq->rq_flags & RQF_USE_SCHED) &&
-           q->elevator->type->ops.finish_request)
-               q->elevator->type->ops.finish_request(rq);
+       blk_mq_finish_request(rq);
 
        if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
                laptop_io_completion(q->disk->bdi);
@@ -1020,6 +1033,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
        if (blk_mq_need_time_stamp(rq))
                __blk_mq_end_request_acct(rq, ktime_get_ns());
 
+       blk_mq_finish_request(rq);
+
        if (rq->end_io) {
                rq_qos_done(rq->q, rq);
                if (rq->end_io(rq, error) == RQ_END_IO_FREE)
@@ -1074,6 +1089,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
                if (iob->need_ts)
                        __blk_mq_end_request_acct(rq, now);
 
+               blk_mq_finish_request(rq);
+
                rq_qos_done(rq->q, rq);
 
                /*
index 8400e30..5ff093c 100644 (file)
@@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+       /* finish request is mandatory */
+       if (WARN_ON_ONCE(!e->ops.finish_request))
+               return -EINVAL;
        /* insert_requests and dispatch_request are mandatory */
        if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
                return -EINVAL;
index a286bf3..838ffad 100644 (file)
@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
                task_io_account_write(bio->bi_iter.bi_size);
        }
 
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               bio->bi_opf |= REQ_NOWAIT;
+
        if (iocb->ki_flags & IOCB_HIPRI) {
-               bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+               bio->bi_opf |= REQ_POLLED;
                submit_bio(bio);
                WRITE_ONCE(iocb->private, bio);
        } else {
-               if (iocb->ki_flags & IOCB_NOWAIT)
-                       bio->bi_opf |= REQ_NOWAIT;
                submit_bio(bio);
        }
        return -EIOCBQUEUED;
index 06b15b9..10efb56 100644 (file)
@@ -1241,6 +1241,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                                return -ENOMEM;
                }
 
+               rsgl->sgl.need_unpin =
+                       iov_iter_extract_will_pin(&msg->msg_iter);
                rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
                rsgl->sgl.sgt.nents = 0;
                rsgl->sgl.sgt.orig_nents = 0;
@@ -1255,8 +1257,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                }
 
                sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
-               rsgl->sgl.need_unpin =
-                       iov_iter_extract_will_pin(&msg->msg_iter);
 
                /* chain the new scatterlist with previous one */
                if (areq->last_rsgl)
index 52b339a..9967fcf 100644 (file)
@@ -173,6 +173,9 @@ static void internal_free_pages_locked(struct ivpu_bo *bo)
 {
        unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
 
+       if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+               set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
        for (i = 0; i < npages; i++)
                put_page(bo->pages[i]);
 
@@ -587,6 +590,11 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
        if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
                drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
 
+       if (bo->flags & DRM_IVPU_BO_WC)
+               set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+       else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+               set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+
        prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
        bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
        if (!bo->kvaddr) {
index cfbc92d..388abd4 100644 (file)
@@ -392,18 +392,31 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
                                   struct qaic_manage_trans_dma_xfer *in_trans,
                                   struct ioctl_resources *resources, struct dma_xfer *xfer)
 {
+       u64 xfer_start_addr, remaining, end, total;
        unsigned long need_pages;
        struct page **page_list;
        unsigned long nr_pages;
        struct sg_table *sgt;
-       u64 xfer_start_addr;
        int ret;
        int i;
 
-       xfer_start_addr = in_trans->addr + resources->xferred_dma_size;
+       if (check_add_overflow(in_trans->addr, resources->xferred_dma_size, &xfer_start_addr))
+               return -EINVAL;
 
-       need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) -
-                                 resources->xferred_dma_size, PAGE_SIZE);
+       if (in_trans->size < resources->xferred_dma_size)
+               return -EINVAL;
+       remaining = in_trans->size - resources->xferred_dma_size;
+       if (remaining == 0)
+               return 0;
+
+       if (check_add_overflow(xfer_start_addr, remaining, &end))
+               return -EINVAL;
+
+       total = remaining + offset_in_page(xfer_start_addr);
+       if (total >= SIZE_MAX)
+               return -EINVAL;
+
+       need_pages = DIV_ROUND_UP(total, PAGE_SIZE);
 
        nr_pages = need_pages;
 
@@ -435,7 +448,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
 
        ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages,
                                        offset_in_page(xfer_start_addr),
-                                       in_trans->size - resources->xferred_dma_size, GFP_KERNEL);
+                                       remaining, GFP_KERNEL);
        if (ret) {
                ret = -ENOMEM;
                goto free_sgt;
@@ -566,9 +579,6 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
            QAIC_MANAGE_EXT_MSG_LENGTH)
                return -ENOMEM;
 
-       if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size)
-               return -EINVAL;
-
        xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
        if (!xfer)
                return -ENOMEM;
index e9a1cb7..6b6d981 100644 (file)
@@ -1021,6 +1021,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
        bo->dbc = dbc;
        srcu_read_unlock(&dbc->ch_lock, rcu_id);
        drm_gem_object_put(obj);
+       kfree(slice_ent);
        srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
        srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
 
index 1dd8d5a..32cfa3f 100644 (file)
@@ -470,6 +470,49 @@ static const struct dmi_system_id asus_laptop[] = {
        { }
 };
 
+static const struct dmi_system_id tongfang_gm_rg[] = {
+       {
+               .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id maingear_laptop[] = {
+       {
+               .ident = "MAINGEAR Vector Pro 2 15",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
+               }
+       },
+       {
+               .ident = "MAINGEAR Vector Pro 2 17",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id pcspecialist_laptop[] = {
+       {
+               .ident = "PCSpecialist Elimina Pro 16 M",
+               /*
+                * Some models have product-name "Elimina Pro 16 M",
+                * others "GM6BGEQ". Match on board-name to match both.
+                */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"),
+                       DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"),
+               },
+       },
+       { }
+};
+
 static const struct dmi_system_id lg_laptop[] = {
        {
                .ident = "LG Electronics 17U70P",
@@ -493,6 +536,9 @@ struct irq_override_cmp {
 static const struct irq_override_cmp override_table[] = {
        { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
        { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
+       { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { pcspecialist_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
        { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
 };
 
@@ -512,6 +558,28 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
                        return entry->override;
        }
 
+#ifdef CONFIG_X86
+       /*
+        * Always use the MADT override info, except for the i8042 PS/2 ctrl
+        * IRQs (1 and 12). For these the DSDT IRQ settings should sometimes
+        * be used otherwise PS/2 keyboards / mice will not work.
+        */
+       if (gsi != 1 && gsi != 12)
+               return true;
+
+       /* If the override comes from an INT_SRC_OVR MADT entry, honor it. */
+       if (acpi_int_src_ovr[gsi])
+               return true;
+
+       /*
+        * IRQ override isn't needed on modern AMD Zen systems and
+        * this override breaks active low IRQs on AMD Ryzen 6000 and
+        * newer systems. Skip it.
+        */
+       if (boot_cpu_has(X86_FEATURE_ZEN))
+               return false;
+#endif
+
        return true;
 }
 
index 5b145f1..87e3855 100644 (file)
@@ -1714,6 +1714,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
                {"BSG1160", },
                {"BSG2150", },
                {"CSC3551", },
+               {"CSC3556", },
                {"INT33FE", },
                {"INT3515", },
                /* Non-conforming _HID for Cirrus Logic already released */
index 486c827..d720f93 100644 (file)
@@ -6617,6 +6617,7 @@ err_init_binder_device_failed:
 
 err_alloc_device_names_failed:
        debugfs_remove_recursive(binder_debugfs_dir_entry_root);
+       binder_alloc_shrinker_exit();
 
        return ret;
 }
index 662a2a2..e3db829 100644 (file)
@@ -1087,6 +1087,12 @@ int binder_alloc_shrinker_init(void)
        return ret;
 }
 
+void binder_alloc_shrinker_exit(void)
+{
+       unregister_shrinker(&binder_shrinker);
+       list_lru_destroy(&binder_alloc_lru);
+}
+
 /**
  * check_buffer() - verify that buffer/offset is safe to access
  * @alloc: binder_alloc for this proc
index 138d1d5..dc1e2b0 100644 (file)
@@ -129,6 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
                                                  int pid);
 extern void binder_alloc_init(struct binder_alloc *alloc);
 extern int binder_alloc_shrinker_init(void);
+extern void binder_alloc_shrinker_exit(void);
 extern void binder_alloc_vma_close(struct binder_alloc *alloc);
 extern struct binder_buffer *
 binder_alloc_prepare_to_free(struct binder_alloc *alloc,
index c1815b9..fe6690e 100644 (file)
@@ -509,73 +509,30 @@ static void __init cpu_dev_register_generic(void)
 }
 
 #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
-
-ssize_t __weak cpu_show_meltdown(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v1(struct device *dev,
-                                  struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v2(struct device *dev,
-                                  struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
-                                         struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_l1tf(struct device *dev,
-                            struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_mds(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
-                                     struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_srbds(struct device *dev,
+static ssize_t cpu_show_not_affected(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
        return sysfs_emit(buf, "Not affected\n");
 }
 
-ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
-                                       struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_retbleed(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
+#define CPU_SHOW_VULN_FALLBACK(func)                                   \
+       ssize_t cpu_show_##func(struct device *,                        \
+                                 struct device_attribute *, char *)    \
+                __attribute__((weak, alias("cpu_show_not_affected")))
+
+CPU_SHOW_VULN_FALLBACK(meltdown);
+CPU_SHOW_VULN_FALLBACK(spectre_v1);
+CPU_SHOW_VULN_FALLBACK(spectre_v2);
+CPU_SHOW_VULN_FALLBACK(spec_store_bypass);
+CPU_SHOW_VULN_FALLBACK(l1tf);
+CPU_SHOW_VULN_FALLBACK(mds);
+CPU_SHOW_VULN_FALLBACK(tsx_async_abort);
+CPU_SHOW_VULN_FALLBACK(itlb_multihit);
+CPU_SHOW_VULN_FALLBACK(srbds);
+CPU_SHOW_VULN_FALLBACK(mmio_stale_data);
+CPU_SHOW_VULN_FALLBACK(retbleed);
+CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
+CPU_SHOW_VULN_FALLBACK(gds);
 
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -588,6 +545,8 @@ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
 static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
 static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_meltdown.attr,
@@ -601,6 +560,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_srbds.attr,
        &dev_attr_mmio_stale_data.attr,
        &dev_attr_retbleed.attr,
+       &dev_attr_spec_rstack_overflow.attr,
+       &dev_attr_gather_data_sampling.attr,
        NULL
 };
 
index c36d8b1..3988755 100644 (file)
@@ -25,7 +25,7 @@
 
 static struct device *rnbd_dev;
 static const struct class rnbd_dev_class = {
-       .name = "rnbd_client",
+       .name = "rnbd-client",
 };
 static struct kobject *rnbd_devs_kobj;
 
index 5676e6d..06673c6 100644 (file)
@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
 
 static void zram_bio_read(struct zram *zram, struct bio *bio)
 {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
 
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
                if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
                        atomic64_inc(&zram->stats.failed_reads);
@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
                zram_slot_lock(zram, index);
                zram_accessed(zram, index);
                zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
        bio_end_io_acct(bio, start_time);
        bio_endio(bio);
 }
 
 static void zram_bio_write(struct zram *zram, struct bio *bio)
 {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
 
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
                if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
                        atomic64_inc(&zram->stats.failed_writes);
@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
                zram_slot_lock(zram, index);
                zram_accessed(zram, index);
                zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
        bio_end_io_acct(bio, start_time);
        bio_endio(bio);
 }
index 21fe985..4cb23b9 100644 (file)
@@ -2142,6 +2142,8 @@ static int sysc_reset(struct sysc *ddata)
                sysc_val = sysc_read_sysconfig(ddata);
                sysc_val |= sysc_mask;
                sysc_write(ddata, sysc_offset, sysc_val);
+               /* Flush posted write */
+               sysc_val = sysc_read_sysconfig(ddata);
        }
 
        if (ddata->cfg.srst_udelay)
index cf5499e..ea6b401 100644 (file)
@@ -510,70 +510,6 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip)
        return 0;
 }
 
-/*
- * Some AMD fTPM versions may cause stutter
- * https://www.amd.com/en/support/kb/faq/pa-410
- *
- * Fixes are available in two series of fTPM firmware:
- * 6.x.y.z series: 6.0.18.6 +
- * 3.x.y.z series: 3.57.y.5 +
- */
-#ifdef CONFIG_X86
-static bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
-       u32 val1, val2;
-       u64 version;
-       int ret;
-
-       if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
-               return false;
-
-       ret = tpm_request_locality(chip);
-       if (ret)
-               return false;
-
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL);
-       if (ret)
-               goto release;
-       if (val1 != 0x414D4400U /* AMD */) {
-               ret = -ENODEV;
-               goto release;
-       }
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL);
-       if (ret)
-               goto release;
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL);
-
-release:
-       tpm_relinquish_locality(chip);
-
-       if (ret)
-               return false;
-
-       version = ((u64)val1 << 32) | val2;
-       if ((version >> 48) == 6) {
-               if (version >= 0x0006000000180006ULL)
-                       return false;
-       } else if ((version >> 48) == 3) {
-               if (version >= 0x0003005700000005ULL)
-                       return false;
-       } else {
-               return false;
-       }
-
-       dev_warn(&chip->dev,
-                "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n",
-                version);
-
-       return true;
-}
-#else
-static inline bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
-       return false;
-}
-#endif /* CONFIG_X86 */
-
 static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
        struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng);
@@ -585,10 +521,20 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
        return tpm_get_random(chip, data, max);
 }
 
+static bool tpm_is_hwrng_enabled(struct tpm_chip *chip)
+{
+       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
+               return false;
+       if (tpm_is_firmware_upgrade(chip))
+               return false;
+       if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED)
+               return false;
+       return true;
+}
+
 static int tpm_add_hwrng(struct tpm_chip *chip)
 {
-       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) ||
-           tpm_amd_is_rng_defective(chip))
+       if (!tpm_is_hwrng_enabled(chip))
                return 0;
 
        snprintf(chip->hwrng_name, sizeof(chip->hwrng_name),
@@ -693,7 +639,7 @@ int tpm_chip_register(struct tpm_chip *chip)
        return 0;
 
 out_hwrng:
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip))
+       if (tpm_is_hwrng_enabled(chip))
                hwrng_unregister(&chip->hwrng);
 out_ppi:
        tpm_bios_log_teardown(chip);
@@ -718,8 +664,7 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
 void tpm_chip_unregister(struct tpm_chip *chip)
 {
        tpm_del_legacy_sysfs(chip);
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip) &&
-           !tpm_amd_is_rng_defective(chip))
+       if (tpm_is_hwrng_enabled(chip))
                hwrng_unregister(&chip->hwrng);
        tpm_bios_log_teardown(chip);
        if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
index 1a5d09b..9eb1a18 100644 (file)
@@ -463,6 +463,28 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
        return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
 }
 
+static int crb_check_flags(struct tpm_chip *chip)
+{
+       u32 val;
+       int ret;
+
+       ret = crb_request_locality(chip, 0);
+       if (ret)
+               return ret;
+
+       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
+       if (ret)
+               goto release;
+
+       if (val == 0x414D4400U /* AMD */)
+               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+
+release:
+       crb_relinquish_locality(chip, 0);
+
+       return ret;
+}
+
 static const struct tpm_class_ops tpm_crb = {
        .flags = TPM_OPS_AUTO_STARTUP,
        .status = crb_status,
@@ -800,6 +822,14 @@ static int crb_acpi_add(struct acpi_device *device)
        chip->acpi_dev_handle = device->handle;
        chip->flags = TPM_CHIP_FLAG_TPM2;
 
+       rc = tpm_chip_bootstrap(chip);
+       if (rc)
+               goto out;
+
+       rc = crb_check_flags(chip);
+       if (rc)
+               goto out;
+
        rc = tpm_chip_register(chip);
 
 out:
index cc42cf3..7fa3d91 100644 (file)
@@ -89,7 +89,7 @@ static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
        tpm_tis_flush(iobase);
 }
 
-static int interrupts = -1;
+static int interrupts;
 module_param(interrupts, int, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
@@ -164,10 +164,26 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
        },
        {
                .callback = tpm_tis_disable_irq,
+               .ident = "ThinkStation P620",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P620"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
+               .ident = "TUXEDO InfinityBook S 15/17 Gen7",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "TUXEDO InfinityBook S 15/17 Gen7"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
                .ident = "UPX-TGL",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
-                       DMI_MATCH(DMI_PRODUCT_VERSION, "UPX-TGL"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01"),
                },
        },
        {}
index 4fb4fd4..737aa70 100644 (file)
@@ -205,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put);
 struct clk *devm_get_clk_from_child(struct device *dev,
                                    struct device_node *np, const char *con_id)
 {
-       struct clk **ptr, *clk;
+       struct devm_clk_state *state;
+       struct clk *clk;
 
-       ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
-       if (!ptr)
+       state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
+       if (!state)
                return ERR_PTR(-ENOMEM);
 
        clk = of_clk_get_by_name(np, con_id);
        if (!IS_ERR(clk)) {
-               *ptr = clk;
-               devres_add(dev, ptr);
+               state->clk = clk;
+               devres_add(dev, state);
        } else {
-               devres_free(ptr);
+               devres_free(state);
        }
 
        return clk;
index d33f741..935d9a2 100644 (file)
@@ -151,8 +151,10 @@ static int ti_syscon_gate_clk_probe(struct platform_device *pdev)
                                 data[i].name);
        }
 
-       return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
-                                          hw_data);
+       if (num_clks == 1)
+               return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
+                                                  hw_data->hws[0]);
+       return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, hw_data);
 }
 
 #define TI_SYSCON_CLK_GATE(_name, _offset, _bit_idx)   \
index bca21df..62962ae 100644 (file)
@@ -3,13 +3,6 @@
 # Counter devices
 #
 
-menuconfig COUNTER
-       tristate "Counter support"
-       help
-         This enables counter device support through the Generic Counter
-         interface. You only need to enable this, if you also want to enable
-         one or more of the counter device drivers below.
-
 config I8254
        tristate
        select COUNTER
@@ -25,6 +18,13 @@ config I8254
 
          If built as a module its name will be i8254.
 
+menuconfig COUNTER
+       tristate "Counter support"
+       help
+         This enables counter device support through the Generic Counter
+         interface. You only need to enable this, if you also want to enable
+         one or more of the counter device drivers below.
+
 if COUNTER
 
 config 104_QUAD_8
index 81fba0d..9a1e194 100644 (file)
@@ -1012,8 +1012,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
        return 0;
 }
 
-static ssize_t show_status(struct kobject *kobj,
-                          struct kobj_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
 {
        ssize_t ret;
 
@@ -1024,7 +1024,7 @@ static ssize_t show_status(struct kobject *kobj,
        return ret;
 }
 
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+static ssize_t status_store(struct device *a, struct device_attribute *b,
                            const char *buf, size_t count)
 {
        char *p = memchr(buf, '\n', count);
@@ -1043,7 +1043,7 @@ cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
 cpufreq_freq_attr_rw(energy_performance_preference);
 cpufreq_freq_attr_ro(energy_performance_available_preferences);
-define_one_global_rw(status);
+static DEVICE_ATTR_RW(status);
 
 static struct freq_attr *amd_pstate_attr[] = {
        &amd_pstate_max_freq,
@@ -1062,7 +1062,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
 };
 
 static struct attribute *pstate_global_attributes[] = {
-       &status.attr,
+       &dev_attr_status.attr,
        NULL
 };
 
index c2d6d9c..b88af12 100644 (file)
@@ -120,20 +120,6 @@ static void psci_pd_remove(void)
        }
 }
 
-static bool psci_pd_try_set_osi_mode(void)
-{
-       int ret;
-
-       if (!psci_has_osi_support())
-               return false;
-
-       ret = psci_set_osi_mode(true);
-       if (ret)
-               return false;
-
-       return true;
-}
-
 static void psci_cpuidle_domain_sync_state(struct device *dev)
 {
        /*
@@ -152,15 +138,12 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct device_node *node;
-       bool use_osi;
+       bool use_osi = psci_has_osi_support();
        int ret = 0, pd_count = 0;
 
        if (!np)
                return -ENODEV;
 
-       /* If OSI mode is supported, let's try to enable it. */
-       use_osi = psci_pd_try_set_osi_mode();
-
        /*
         * Parse child nodes for the "#power-domain-cells" property and
         * initialize a genpd/genpd-of-provider pair when it's found.
@@ -170,33 +153,37 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
                        continue;
 
                ret = psci_pd_init(node, use_osi);
-               if (ret)
-                       goto put_node;
+               if (ret) {
+                       of_node_put(node);
+                       goto exit;
+               }
 
                pd_count++;
        }
 
        /* Bail out if not using the hierarchical CPU topology. */
        if (!pd_count)
-               goto no_pd;
+               return 0;
 
        /* Link genpd masters/subdomains to model the CPU topology. */
        ret = dt_idle_pd_init_topology(np);
        if (ret)
                goto remove_pd;
 
+       /* let's try to enable OSI. */
+       ret = psci_set_osi_mode(use_osi);
+       if (ret)
+               goto remove_pd;
+
        pr_info("Initialized CPU PM domain topology using %s mode\n",
                use_osi ? "OSI" : "PC");
        return 0;
 
-put_node:
-       of_node_put(node);
 remove_pd:
+       dt_idle_pd_remove_topology(np);
        psci_pd_remove();
+exit:
        pr_err("failed to create CPU PM domains ret=%d\n", ret);
-no_pd:
-       if (use_osi)
-               psci_set_osi_mode(false);
        return ret;
 }
 
index b371655..1af63c1 100644 (file)
@@ -152,6 +152,30 @@ int dt_idle_pd_init_topology(struct device_node *np)
        return 0;
 }
 
+int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       struct device_node *node;
+       struct of_phandle_args child, parent;
+       int ret;
+
+       for_each_child_of_node(np, node) {
+               if (of_parse_phandle_with_args(node, "power-domains",
+                                       "#power-domain-cells", 0, &parent))
+                       continue;
+
+               child.np = node;
+               child.args_count = 0;
+               ret = of_genpd_remove_subdomain(&parent, &child);
+               of_node_put(parent.np);
+               if (ret) {
+                       of_node_put(node);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 struct device *dt_idle_attach_cpu(int cpu, const char *name)
 {
        struct device *dev;
index a95483d..3be1f70 100644 (file)
@@ -14,6 +14,8 @@ struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
 
 int dt_idle_pd_init_topology(struct device_node *np);
 
+int dt_idle_pd_remove_topology(struct device_node *np);
+
 struct device *dt_idle_attach_cpu(int cpu, const char *name);
 
 void dt_idle_detach_cpu(struct device *dev);
@@ -36,6 +38,11 @@ static inline int dt_idle_pd_init_topology(struct device_node *np)
        return 0;
 }
 
+static inline int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       return 0;
+}
+
 static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
 {
        return NULL;
index ff9ddbb..68e7377 100644 (file)
@@ -382,8 +382,8 @@ static void kick_trng(struct device *dev, int ent_delay)
                val = ent_delay;
                /* min. freq. count, equal to 1/4 of the entropy sample length */
                wr_reg32(&r4tst->rtfrqmin, val >> 2);
-               /* max. freq. count, equal to 16 times the entropy sample length */
-               wr_reg32(&r4tst->rtfrqmax, val << 4);
+               /* disable maximum frequency count */
+               wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
        }
 
        wr_reg32(&r4tst->rtsdctl, (val << RTSDCTL_ENT_DLY_SHIFT) |
index 63f0aeb..f0a3527 100644 (file)
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
  */
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
+       LIST_HEAD(signalled);
        struct sync_pt *pt, *next;
 
        trace_sync_timeline(obj);
@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
                if (!timeline_fence_signaled(&pt->base))
                        break;
 
-               list_del_init(&pt->link);
+               dma_fence_get(&pt->base);
+
+               list_move_tail(&pt->link, &signalled);
                rb_erase(&pt->node, &obj->pt_tree);
 
-               /*
-                * A signal callback may release the last reference to this
-                * fence, causing it to be freed. That operation has to be
-                * last to avoid a use after free inside this loop, and must
-                * be after we remove the fence from the timeline in order to
-                * prevent deadlocking on timeline->lock inside
-                * timeline_fence_release().
-                */
                dma_fence_signal_locked(&pt->base);
        }
 
        spin_unlock_irq(&obj->lock);
+
+       list_for_each_entry_safe(pt, next, &signalled, link) {
+               list_del_init(&pt->link);
+               dma_fence_put(&pt->base);
+       }
 }
 
 /**
index 644c188..08fdd0e 100644 (file)
@@ -211,6 +211,7 @@ config FSL_DMA
 config FSL_EDMA
        tristate "Freescale eDMA engine support"
        depends on OF
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
@@ -280,6 +281,7 @@ config IMX_SDMA
 
 config INTEL_IDMA64
        tristate "Intel integrated DMA 64-bit support"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index 5abbcc6..9a15f0d 100644 (file)
@@ -384,9 +384,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
        wq->threshold = 0;
        wq->priority = 0;
        wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
-       clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
-       clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
-       clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+       wq->flags = 0;
        memset(wq->name, 0, WQ_NAME_SIZE);
        wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
        idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
index ebd8733..9413fad 100644 (file)
@@ -190,7 +190,13 @@ static int mcf_edma_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       chans = pdata->dma_channels;
+       if (!pdata->dma_channels) {
+               dev_info(&pdev->dev, "setting default channel number to 64");
+               chans = 64;
+       } else {
+               chans = pdata->dma_channels;
+       }
+
        len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
        mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
        if (!mcf_edma)
@@ -202,11 +208,6 @@ static int mcf_edma_probe(struct platform_device *pdev)
        mcf_edma->drvdata = &mcf_data;
        mcf_edma->big_endian = 1;
 
-       if (!mcf_edma->n_chans) {
-               dev_info(&pdev->dev, "setting default channel number to 64");
-               mcf_edma->n_chans = 64;
-       }
-
        mutex_init(&mcf_edma->fsl_edma_mutex);
 
        mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0);
index 95a462a..b6e0ac8 100644 (file)
@@ -192,7 +192,7 @@ struct owl_dma_pchan {
 };
 
 /**
- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
  * @vc: wrapped virtual channel
  * @pchan: the physical channel utilized by this channel
  * @txd: active transaction on this channel
index b4731fe..3cf0b38 100644 (file)
@@ -404,6 +404,12 @@ enum desc_status {
         */
        BUSY,
        /*
+        * Pause was called while descriptor was BUSY. Due to hardware
+        * limitations, only termination is possible for descriptors
+        * that have been paused.
+        */
+       PAUSED,
+       /*
         * Sitting on the channel work_list but xfer done
         * by PL330 core
         */
@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
        list_for_each_entry(desc, &pch->work_list, node) {
 
                /* If already submitted */
-               if (desc->status == BUSY)
+               if (desc->status == BUSY || desc->status == PAUSED)
                        continue;
 
                ret = pl330_submit_req(pch->thread, desc);
@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
 {
        struct dma_pl330_chan *pch = to_pchan(chan);
        struct pl330_dmac *pl330 = pch->dmac;
+       struct dma_pl330_desc *desc;
        unsigned long flags;
 
        pm_runtime_get_sync(pl330->ddma.dev);
@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
        _stop(pch->thread);
        spin_unlock(&pl330->lock);
 
+       list_for_each_entry(desc, &pch->work_list, node) {
+               if (desc->status == BUSY)
+                       desc->status = PAUSED;
+       }
        spin_unlock_irqrestore(&pch->lock, flags);
        pm_runtime_mark_last_busy(pl330->ddma.dev);
        pm_runtime_put_autosuspend(pl330->ddma.dev);
@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                else if (running && desc == running)
                        transferred =
                                pl330_get_current_xferred_count(pch, desc);
-               else if (desc->status == BUSY)
+               else if (desc->status == BUSY || desc->status == PAUSED)
                        /*
                         * Busy but not running means either just enqueued,
                         * or finished and not yet marked done
@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                        case DONE:
                                ret = DMA_COMPLETE;
                                break;
+                       case PAUSED:
+                               ret = DMA_PAUSED;
+                               break;
                        case PREP:
                        case BUSY:
                                ret = DMA_IN_PROGRESS;
index 93ee298..e0bfd12 100644 (file)
@@ -668,6 +668,8 @@ static int xdma_set_vector_reg(struct xdma_device *xdev, u32 vec_tbl_start,
                        val |= irq_start << shift;
                        irq_start++;
                        irq_num--;
+                       if (!irq_num)
+                               break;
                }
 
                /* write IRQ register */
@@ -715,7 +717,7 @@ static int xdma_irq_init(struct xdma_device *xdev)
                ret = request_irq(irq, xdma_channel_isr, 0,
                                  "xdma-c2h-channel", &xdev->c2h_chans[j]);
                if (ret) {
-                       xdma_err(xdev, "H2C channel%d request irq%d failed: %d",
+                       xdma_err(xdev, "C2H channel%d request irq%d failed: %d",
                                 j, irq, ret);
                        goto failed_init_c2h;
                }
@@ -892,7 +894,7 @@ static int xdma_probe(struct platform_device *pdev)
        }
 
        reg_base = devm_ioremap_resource(&pdev->dev, res);
-       if (!reg_base) {
+       if (IS_ERR(reg_base)) {
                xdma_err(xdev, "ioremap failed");
                goto failed;
        }
index 8b49b0a..533d815 100644 (file)
@@ -291,6 +291,15 @@ static void gpio_sim_mutex_destroy(void *data)
        mutex_destroy(lock);
 }
 
+static void gpio_sim_dispose_mappings(void *data)
+{
+       struct gpio_sim_chip *chip = data;
+       unsigned int i;
+
+       for (i = 0; i < chip->gc.ngpio; i++)
+               irq_dispose_mapping(irq_find_mapping(chip->irq_sim, i));
+}
+
 static void gpio_sim_sysfs_remove(void *data)
 {
        struct gpio_sim_chip *chip = data;
@@ -402,10 +411,14 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
        if (!chip->pull_map)
                return -ENOMEM;
 
-       chip->irq_sim = devm_irq_domain_create_sim(dev, NULL, num_lines);
+       chip->irq_sim = devm_irq_domain_create_sim(dev, swnode, num_lines);
        if (IS_ERR(chip->irq_sim))
                return PTR_ERR(chip->irq_sim);
 
+       ret = devm_add_action_or_reset(dev, gpio_sim_dispose_mappings, chip);
+       if (ret)
+               return ret;
+
        mutex_init(&chip->lock);
        ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy,
                                       &chip->lock);
@@ -429,6 +442,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
        gc->set_config = gpio_sim_set_config;
        gc->to_irq = gpio_sim_to_irq;
        gc->free = gpio_sim_free;
+       gc->can_sleep = true;
 
        ret = devm_gpiochip_add_data(dev, gc, chip);
        if (ret)
index e73885a..afb42a8 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#define WS16C48_EXTENT 10
+#define WS16C48_EXTENT 11
 #define MAX_NUM_WS16C48 max_num_isa_dev(WS16C48_EXTENT)
 
 static unsigned int base[MAX_NUM_WS16C48];
index 530dfd1..50503a4 100644 (file)
@@ -515,8 +515,9 @@ static ssize_t unexport_store(const struct class *class,
         * they may be undone on its behalf too.
         */
        if (test_and_clear_bit(FLAG_SYSFS, &desc->flags)) {
-               status = 0;
+               gpiod_unexport(desc);
                gpiod_free(desc);
+               status = 0;
        }
 done:
        if (status)
@@ -781,8 +782,10 @@ void gpiochip_sysfs_unregister(struct gpio_device *gdev)
        mutex_unlock(&sysfs_lock);
 
        /* unregister gpiod class devices owned by sysfs */
-       for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS)
+       for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS) {
+               gpiod_unexport(desc);
                gpiod_free(desc);
+       }
 }
 
 static int __init gpiolib_sysfs_init(void)
index 251c875..76e0c38 100644 (file)
@@ -2167,12 +2167,18 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
 
 void gpiod_free(struct gpio_desc *desc)
 {
-       if (desc && desc->gdev && gpiod_free_commit(desc)) {
-               module_put(desc->gdev->owner);
-               gpio_device_put(desc->gdev);
-       } else {
+       /*
+        * We must not use VALIDATE_DESC_VOID() as the underlying gdev->chip
+        * may already be NULL but we still want to put the references.
+        */
+       if (!desc)
+               return;
+
+       if (!gpiod_free_commit(desc))
                WARN_ON(extra_checks);
-       }
+
+       module_put(desc->gdev->owner);
+       gpio_device_put(desc->gdev);
 }
 
 /**
index a3b86b8..6dc950c 100644 (file)
@@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
 bool amdgpu_device_pcie_dynamic_switching_supported(void);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
 bool amdgpu_device_aspm_support_quirk(void);
index 040f4cb..fb78a8f 100644 (file)
@@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 
        if (!p->gang_size) {
                ret = -EINVAL;
-               goto free_partial_kdata;
+               goto free_all_kdata;
        }
 
        for (i = 0; i < p->gang_size; ++i) {
index a2cdde0..6238701 100644 (file)
@@ -1459,6 +1459,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
 }
 
 /*
+ * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
+ * Disable S/G on such systems until we have a proper fix.
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
+ */
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
+{
+       switch (amdgpu_sg_display) {
+       case -1:
+               break;
+       case 0:
+               return false;
+       case 1:
+               return true;
+       default:
+               return false;
+       }
+       if ((totalram_pages() << (PAGE_SHIFT - 10)) +
+           (adev->gmc.real_vram_size / 1024) >= 64000000) {
+               DRM_WARN("Disabling S/G due to >=64GB RAM\n");
+               return false;
+       }
+       return true;
+}
+
+/*
  * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
  * speed switching. Until we have confirmation from Intel that a specific host
  * supports it, it's safer that we keep it disabled for all.
@@ -3696,10 +3722,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 {
        if (amdgpu_mcbp == 1)
                adev->gfx.mcbp = true;
-
-       if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
-           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
-           adev->gfx.num_gfx_rings)
+       else if (amdgpu_mcbp == 0)
+               adev->gfx.mcbp = false;
+       else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
+                (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+                adev->gfx.num_gfx_rings)
                adev->gfx.mcbp = true;
 
        if (amdgpu_sriov_vf(adev))
@@ -4367,6 +4394,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
                drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
 
        cancel_delayed_work_sync(&adev->delayed_init_work);
+       flush_delayed_work(&adev->gfx.gfx_off_delay_work);
 
        amdgpu_ras_suspend(adev);
 
index c694b41..7537f5a 100644 (file)
@@ -552,6 +552,41 @@ int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
 }
 
 /**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       bool is_gfx_power_domain = false;
+
+       switch (ring->funcs->type) {
+       case AMDGPU_RING_TYPE_SDMA:
+       /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+               if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0))
+                       is_gfx_power_domain = true;
+               break;
+       case AMDGPU_RING_TYPE_GFX:
+       case AMDGPU_RING_TYPE_COMPUTE:
+       case AMDGPU_RING_TYPE_KIQ:
+       case AMDGPU_RING_TYPE_MES:
+               is_gfx_power_domain = true;
+               break;
+       default:
+               break;
+       }
+
+       return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
  * amdgpu_fence_driver_hw_fini - tear down the fence driver
  * for all possible rings.
  *
@@ -579,7 +614,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
                        amdgpu_fence_driver_force_completion(ring);
 
                if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
-                   ring->fence_drv.irq_src)
+                   ring->fence_drv.irq_src &&
+                   amdgpu_fence_need_ring_interrupt_restore(ring))
                        amdgpu_irq_put(adev, ring->fence_drv.irq_src,
                                       ring->fence_drv.irq_type);
 
@@ -655,7 +691,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
                        continue;
 
                /* enable the interrupt */
-               if (ring->fence_drv.irq_src)
+               if (ring->fence_drv.irq_src &&
+                   amdgpu_fence_need_ring_interrupt_restore(ring))
                        amdgpu_irq_get(adev, ring->fence_drv.irq_src,
                                       ring->fence_drv.irq_type);
        }
index a33d4bc..fd81b04 100644 (file)
@@ -692,15 +692,8 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 
                if (adev->gfx.gfx_off_req_count == 0 &&
                    !adev->gfx.gfx_off_state) {
-                       /* If going to s2idle, no need to wait */
-                       if (adev->in_s0ix) {
-                               if (!amdgpu_dpm_set_powergating_by_smu(adev,
-                                               AMD_IP_BLOCK_TYPE_GFX, true))
-                                       adev->gfx.gfx_off_state = true;
-                       } else {
-                               schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+                       schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
                                              delay);
-                       }
                }
        } else {
                if (adev->gfx.gfx_off_req_count == 0) {
index b779ee4..e1ee1c7 100644 (file)
@@ -397,7 +397,7 @@ void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
        struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
 
        WARN_ON(!ring->is_sw_ring);
-       if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+       if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
                if (amdgpu_mcbp_scan(mux) > 0)
                        amdgpu_mcbp_trigger_preempt(mux);
                return;
index 9c9cca1..565a1fa 100644 (file)
@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
 
        for (i = 1; i < MAX_XCP; i++) {
                ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
-               if (ret)
+               if (ret == -ENOSPC) {
+                       dev_warn(adev->dev,
+                       "Skip xcp node #%d when out of drm node resource.", i);
+                       return 0;
+               } else if (ret) {
                        return ret;
+               }
 
                /* Redirect all IOCTLs to the primary device */
                adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
                return 0;
 
        for (i = 1; i < MAX_XCP; i++) {
+               if (!adev->xcp_mgr->xcp[i].ddev)
+                       break;
+
                ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
                if (ret)
                        return ret;
@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
                return;
 
        for (i = 1; i < MAX_XCP; i++) {
+               if (!adev->xcp_mgr->xcp[i].ddev)
+                       break;
+
                p_ddev = adev->xcp_mgr->xcp[i].ddev;
                drm_dev_unplug(p_ddev);
                p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
index 3a7af59..0451533 100644 (file)
@@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
        case IP_VERSION(11, 0, 3):
                if ((adev->gfx.me_fw_version >= 1505) &&
                    (adev->gfx.pfp_fw_version >= 1600) &&
-                   (adev->gfx.mec_fw_version >= 512))
-                       adev->gfx.cp_gfx_shadow = true;
+                   (adev->gfx.mec_fw_version >= 512)) {
+                       if (amdgpu_sriov_vf(adev))
+                               adev->gfx.cp_gfx_shadow = true;
+                       else
+                               adev->gfx.cp_gfx_shadow = false;
+               }
                break;
        default:
                adev->gfx.cp_gfx_shadow = false;
index e1a392b..af5685f 100644 (file)
@@ -137,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
        int ret;
        int retry_loop;
 
+       /* Wait for bootloader to signify that it is ready having bit 31 of
+        * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+        * If there is an error in processing command, bits[7:0] will be set.
+        * This is applicable for PSP v13.0.6 and newer.
+        */
        for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-               /* Wait for bootloader to signify that is
-                   ready having bit 31 of C2PMSG_35 set to 1 */
-               ret = psp_wait_for(psp,
-                                  SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
-                                  0x80000000,
-                                  0x80000000,
-                                  false);
+               ret = psp_wait_for(
+                       psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+                       0x80000000, 0xffffffff, false);
 
                if (ret == 0)
                        return 0;
index 49f40d9..f5a6f56 100644 (file)
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
        if (ignore_crat)
                return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
        ret = true;
-#else
-       ret = false;
-#endif
 
        return ret;
 }
index 0b3dc75..a53e075 100644 (file)
@@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
 
                kfd_device_info_set_event_interrupt_class(kfd);
 
-               /* Raven */
-               if (gc_version == IP_VERSION(9, 1, 0) ||
-                   gc_version == IP_VERSION(9, 2, 2))
-                       kfd->device_info.needs_iommu_device = true;
-
                if (gc_version < IP_VERSION(11, 0, 0)) {
                        /* Navi2x+, Navi1x+ */
                        if (gc_version == IP_VERSION(10, 3, 6))
@@ -233,10 +228,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
                    asic_type != CHIP_TONGA)
                        kfd->device_info.supports_cwsr = true;
 
-               if (asic_type == CHIP_KAVERI ||
-                   asic_type == CHIP_CARRIZO)
-                       kfd->device_info.needs_iommu_device = true;
-
                if (asic_type != CHIP_HAWAII && !vf)
                        kfd->device_info.needs_pci_atomics = true;
        }
@@ -249,7 +240,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
        uint32_t gfx_target_version = 0;
 
        switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
 #ifdef CONFIG_DRM_AMDGPU_CIK
        case CHIP_KAVERI:
                gfx_target_version = 70000;
@@ -262,7 +252,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                if (!vf)
                        f2g = &gfx_v8_kfd2kgd;
                break;
-#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
        case CHIP_HAWAII:
                gfx_target_version = 70001;
@@ -298,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        gfx_target_version = 90000;
                        f2g = &gfx_v9_kfd2kgd;
                        break;
-#ifdef KFD_SUPPORT_IOMMU_V2
                /* Raven */
                case IP_VERSION(9, 1, 0):
                case IP_VERSION(9, 2, 2):
@@ -306,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        if (!vf)
                                f2g = &gfx_v9_kfd2kgd;
                        break;
-#endif
                /* Vega12 */
                case IP_VERSION(9, 2, 1):
                        gfx_target_version = 90004;
index 2df1538..01192f5 100644 (file)
@@ -2538,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
        }
 
        switch (dev->adev->asic_type) {
-       case CHIP_CARRIZO:
-               device_queue_manager_init_vi(&dqm->asic_ops);
-               break;
-
        case CHIP_KAVERI:
-               device_queue_manager_init_cik(&dqm->asic_ops);
-               break;
-
        case CHIP_HAWAII:
                device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
                break;
 
+       case CHIP_CARRIZO:
        case CHIP_TONGA:
        case CHIP_FIJI:
        case CHIP_POLARIS10:
index 61fc62f..4a17bb7 100644 (file)
@@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
        const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
 
        gpu_id = kfd_generate_gpu_id(gpu);
-       pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+       if (gpu->xcp && !gpu->xcp->ddev) {
+               dev_warn(gpu->adev->dev,
+               "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
+               gpu_id);
+               return 0;
+       } else {
+               pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+       }
 
        /* Check to see if this gpu device exists in the topology_device_list.
         * If so, assign the gpu to that device,
index 0fa739f..e5554a3 100644 (file)
@@ -1638,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                }
                break;
        }
-       if (init_data.flags.gpu_vm_support &&
-           (amdgpu_sg_display == 0))
-               init_data.flags.gpu_vm_support = false;
+       if (init_data.flags.gpu_vm_support)
+               init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
 
        if (init_data.flags.gpu_vm_support)
                adev->mode_info.gpu_vm_support = true;
index 9bc86de..b885c39 100644 (file)
@@ -1320,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
                if (computed_streams[i])
                        continue;
 
-               if (!res_pool->funcs->remove_stream_from_ctx ||
+               if (res_pool->funcs->remove_stream_from_ctx &&
                    res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
                        return -EINVAL;
 
index 20d4d08..6966420 100644 (file)
@@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready(
        dal_gpio_destroy_irq(&hpd);
 
        /* ensure that the panel is detected */
-       ASSERT(edp_hpd_high);
+       if (!edp_hpd_high)
+               DC_LOG_DC("%s: wait timed out!\n", __func__);
 }
 
 void dce110_edp_power_control(
index 4cc8de2..9f2e243 100644 (file)
@@ -712,7 +712,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                .timing_trace = false,
                .clock_trace = true,
                .disable_pplib_clock_request = true,
-               .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+               .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
                .force_single_disp_pipe_split = false,
                .disable_dcc = DCC_ENABLE,
                .vsr_support = true,
index e5b7ef7..50dc834 100644 (file)
@@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes(
        int cur_rom_en = 0;
 
        if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
-               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
-               cur_rom_en = 1;
+               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
+               if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+                       cur_rom_en = 1;
+               }
+       }
 
        REG_UPDATE_3(CURSOR0_CONTROL,
                        CUR0_MODE, color_format,
index ce41a83..222af2f 100644 (file)
@@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu)
 
        /*
         * For SMU 13.0.4/11, PMFW will handle the features disablement properly
-        * for gpu reset case. Driver involvement is unnecessary.
+        * for gpu reset and S0i3 cases. Driver involvement is unnecessary.
         */
-       if (amdgpu_in_reset(adev)) {
+       if (amdgpu_in_reset(adev) || adev->in_s0ix) {
                switch (adev->ip_versions[MP1_HWIP][0]) {
                case IP_VERSION(13, 0, 4):
                case IP_VERSION(13, 0, 11):
index 0cda3b2..f0800c0 100644 (file)
@@ -588,7 +588,9 @@ err0_out:
        return -ENOMEM;
 }
 
-static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu)
+static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu,
+                                                          bool use_metrics_v3,
+                                                          bool use_metrics_v2)
 {
        struct smu_table_context *smu_table= &smu->smu_table;
        SmuMetricsExternal_t *metrics_ext =
@@ -596,13 +598,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s
        uint32_t throttler_status = 0;
        int i;
 
-       if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-            (smu->smc_fw_version >= 0x3A4900)) {
+       if (use_metrics_v3) {
                for (i = 0; i < THROTTLER_COUNT; i++)
                        throttler_status |=
                                (metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0);
-       } else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-            (smu->smc_fw_version >= 0x3A4300)) {
+       } else if (use_metrics_v2) {
                for (i = 0; i < THROTTLER_COUNT; i++)
                        throttler_status |=
                                (metrics_ext->SmuMetrics_V2.ThrottlingPercentage[i] ? 1U << i : 0);
@@ -864,7 +864,7 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
                        metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        case METRICS_THROTTLER_STATUS:
-               *value = sienna_cichlid_get_throttler_status_locked(smu);
+               *value = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
                break;
        case METRICS_CURR_FANSPEED:
                *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed :
@@ -4017,7 +4017,7 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
        gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] :
                use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1];
 
-       gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu);
+       gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
        gpu_metrics->indep_throttle_status =
                        smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status,
                                                           sienna_cichlid_throttler_map);
index 3d18861..0fb6be1 100644 (file)
@@ -332,10 +332,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
                table_context->power_play_table;
        struct smu_baco_context *smu_baco = &smu->smu_baco;
        PPTable_t *pptable = smu->smu_table.driver_pptable;
+#if 0
+       PPTable_t *pptable = smu->smu_table.driver_pptable;
        const OverDriveLimits_t * const overdrive_upperlimits =
                                &pptable->SkuTable.OverDriveLimitsBasicMax;
        const OverDriveLimits_t * const overdrive_lowerlimits =
                                &pptable->SkuTable.OverDriveLimitsMin;
+#endif
 
        if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
                smu->dc_controlled_by_gpio = true;
@@ -347,18 +350,30 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
        if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
                smu_baco->maco_support = true;
 
+       /*
+        * We are in the transition to a new OD mechanism.
+        * Disable the OD feature support for SMU13 temporarily.
+        * TODO: get this reverted when new OD mechanism online
+        */
+#if 0
        if (!overdrive_lowerlimits->FeatureCtrlMask ||
            !overdrive_upperlimits->FeatureCtrlMask)
                smu->od_enabled = false;
 
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
        /*
         * Instead of having its own buffer space and get overdrive_table copied,
         * smu->od_settings just points to the actual overdrive_table
         */
        smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
+
+       smu->adev->pm.no_fan =
+               !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT));
 
        return 0;
 }
@@ -1140,7 +1155,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                (OverDriveTableExternal_t *)smu->smu_table.overdrive_table;
        struct smu_13_0_dpm_table *single_dpm_table;
        struct smu_13_0_pcie_table *pcie_table;
-       const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
        uint32_t gen_speed, lane_width;
        int i, curr_freq, size = 0;
        int32_t min_value, max_value;
@@ -1256,7 +1270,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                                        (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
                                        pcie_table->clk_freq[i],
                                        (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
-                                       (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+                                       (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
                                        "*" : "");
                break;
 
index 1ac5521..dc6104a 100644 (file)
 #define EPSILON 1
 
 #define smnPCIE_ESM_CTRL 0x193D0
-#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1ab40288
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288
 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L
 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4
+#define MAX_LINK_WIDTH 6
 
 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
        MSG_MAP(TestMessage,                         PPSMC_MSG_TestMessage,                     0),
@@ -708,16 +709,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
                *value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
                break;
        case METRICS_TEMPERATURE_HOTSPOT:
-               *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        case METRICS_TEMPERATURE_MEM:
-               *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        /* This is the max of all VRs and not just SOC VR.
         * No need to define another data type for the same.
         */
        case METRICS_TEMPERATURE_VRSOC:
-               *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        default:
                *value = UINT_MAX;
@@ -1966,6 +1970,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
        struct amdgpu_device *adev = smu->adev;
        int ret = 0, inst0, xcc0;
        MetricsTable_t *metrics;
+       u16 link_width_level;
 
        inst0 = adev->sdma.instance[0].aid_id;
        xcc0 = GET_INST(GC, 0);
@@ -1993,9 +1998,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 
        gpu_metrics->average_socket_power =
                SMUQ10_TO_UINT(metrics->SocketPower);
-       /* Energy is reported in 15.625mJ units */
-       gpu_metrics->energy_accumulator =
-               SMUQ10_TO_UINT(metrics->SocketEnergyAcc);
+       /* Energy counter reported in 15.259uJ (2^-16) units */
+       gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
 
        gpu_metrics->current_gfxclk =
                SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);
@@ -2017,8 +2021,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
        gpu_metrics->throttle_status = 0;
 
        if (!(adev->flags & AMD_IS_APU)) {
+               link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
+               if (link_width_level > MAX_LINK_WIDTH)
+                       link_width_level = 0;
+
                gpu_metrics->pcie_link_width =
-                       smu_v13_0_6_get_current_pcie_link_width_level(smu);
+                       DECODE_LANE_WIDTH(link_width_level);
                gpu_metrics->pcie_link_speed =
                        smu_v13_0_6_get_current_pcie_link_speed(smu);
        }
index b1f0937..62f2886 100644 (file)
@@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
        struct smu_baco_context *smu_baco = &smu->smu_baco;
        PPTable_t *smc_pptable = table_context->driver_pptable;
        BoardTable_t *BoardTable = &smc_pptable->BoardTable;
+#if 0
        const OverDriveLimits_t * const overdrive_upperlimits =
                                &smc_pptable->SkuTable.OverDriveLimitsBasicMax;
        const OverDriveLimits_t * const overdrive_lowerlimits =
                                &smc_pptable->SkuTable.OverDriveLimitsMin;
+#endif
 
        if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
                smu->dc_controlled_by_gpio = true;
@@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
        if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
                smu_baco->maco_support = true;
 
+#if 0
        if (!overdrive_lowerlimits->FeatureCtrlMask ||
            !overdrive_upperlimits->FeatureCtrlMask)
                smu->od_enabled = false;
 
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
        /*
         * Instead of having its own buffer space and get overdrive_table copied,
         * smu->od_settings just points to the actual overdrive_table
         */
        smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
 
        return 0;
 }
index 504d51c..aadb396 100644 (file)
@@ -2517,9 +2517,11 @@ static irqreturn_t it6505_int_threaded_handler(int unused, void *data)
        };
        int int_status[3], i;
 
-       if (it6505->enable_drv_hold || pm_runtime_get_if_in_use(dev) <= 0)
+       if (it6505->enable_drv_hold || !it6505->powered)
                return IRQ_HANDLED;
 
+       pm_runtime_get_sync(dev);
+
        int_status[0] = it6505_read(it6505, INT_STATUS_01);
        int_status[1] = it6505_read(it6505, INT_STATUS_02);
        int_status[2] = it6505_read(it6505, INT_STATUS_03);
index 5163e52..9663601 100644 (file)
@@ -774,9 +774,7 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611,
        dsi->lanes = 4;
        dsi->format = MIPI_DSI_FMT_RGB888;
        dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
-                         MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
-                         MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
-                         MIPI_DSI_MODE_NO_EOT_PACKET;
+                         MIPI_DSI_MODE_VIDEO_HSE;
 
        ret = devm_mipi_dsi_attach(dev, dsi);
        if (ret < 0) {
index 043b810..73ec607 100644 (file)
@@ -1386,6 +1386,18 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
        disable_irq(dsi->irq);
 }
 
+static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
+{
+       u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
+
+       if (enable)
+               reg |= DSIM_FORCE_STOP_STATE;
+       else
+               reg &= ~DSIM_FORCE_STOP_STATE;
+
+       samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+}
+
 static int samsung_dsim_init(struct samsung_dsim *dsi)
 {
        const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1445,15 +1457,12 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
                                       struct drm_bridge_state *old_bridge_state)
 {
        struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-       u32 reg;
 
        if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
                samsung_dsim_set_display_mode(dsi);
                samsung_dsim_set_display_enable(dsi, true);
        } else {
-               reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-               reg &= ~DSIM_FORCE_STOP_STATE;
-               samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+               samsung_dsim_set_stop_state(dsi, false);
        }
 
        dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
@@ -1463,16 +1472,12 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
                                        struct drm_bridge_state *old_bridge_state)
 {
        struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-       u32 reg;
 
        if (!(dsi->state & DSIM_STATE_ENABLED))
                return;
 
-       if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
-               reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-               reg |= DSIM_FORCE_STOP_STATE;
-               samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
-       }
+       if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
+               samsung_dsim_set_stop_state(dsi, true);
 
        dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
 }
@@ -1775,6 +1780,8 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
        if (ret)
                return ret;
 
+       samsung_dsim_set_stop_state(dsi, false);
+
        ret = mipi_dsi_create_packet(&xfer.packet, msg);
        if (ret < 0)
                return ret;
index e0dbd91..1f47096 100644 (file)
@@ -3456,6 +3456,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
                            connector->base.id, connector->name);
                return NULL;
        }
+       if (!(pt->misc & DRM_EDID_PT_SEPARATE_SYNC)) {
+               drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Composite sync not supported\n",
+                           connector->base.id, connector->name);
+       }
 
        /* it is incorrect if hsync/vsync width is zero */
        if (!hsync_pulse_width || !vsync_pulse_width) {
@@ -3502,27 +3506,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
        if (info->quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
                mode->flags |= DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC;
        } else {
-               switch (pt->misc & DRM_EDID_PT_SYNC_MASK) {
-               case DRM_EDID_PT_ANALOG_CSYNC:
-               case DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC:
-                       drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Analog composite sync!\n",
-                                   connector->base.id, connector->name);
-                       mode->flags |= DRM_MODE_FLAG_CSYNC | DRM_MODE_FLAG_NCSYNC;
-                       break;
-               case DRM_EDID_PT_DIGITAL_CSYNC:
-                       drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Digital composite sync!\n",
-                                   connector->base.id, connector->name);
-                       mode->flags |= DRM_MODE_FLAG_CSYNC;
-                       mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PCSYNC : DRM_MODE_FLAG_NCSYNC;
-                       break;
-               case DRM_EDID_PT_DIGITAL_SEPARATE_SYNC:
-                       mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
-                       mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
-                       break;
-               }
+               mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
+                       DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+               mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
+                       DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
        }
 
 set_size:
index 4ea6507..baaf0e0 100644 (file)
@@ -623,7 +623,13 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
        int ret;
 
        if (obj->import_attach) {
+               /* Reset both vm_ops and vm_private_data, so we don't end up with
+                * vm_ops pointing to our implementation if the dma-buf backend
+                * doesn't set those fields.
+                */
                vma->vm_private_data = NULL;
+               vma->vm_ops = NULL;
+
                ret = dma_buf_mmap(obj->dma_buf, vma, 0);
 
                /* Drop the reference drm_gem_mmap_obj() acquired.*/
index 2fb9bf9..3f47948 100644 (file)
@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
 }
 
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void reschedule_output_poll_work(struct drm_device *dev)
+{
+       unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+
+       if (dev->mode_config.delayed_event)
+               /*
+                * FIXME:
+                *
+                * Use short (1s) delay to handle the initial delayed event.
+                * This delay should not be needed, but Optimus/nouveau will
+                * fail in a mysterious way if the delayed event is handled as
+                * soon as possible like it is done in
+                * drm_helper_probe_single_connector_modes() in case the poll
+                * was enabled before.
+                */
+               delay = HZ;
+
+       schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+}
+
 /**
  * drm_kms_helper_poll_enable - re-enable output polling.
  * @dev: drm_device
@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
  */
 void drm_kms_helper_poll_enable(struct drm_device *dev)
 {
-       bool poll = false;
-       unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
-
        if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
            dev->mode_config.poll_running)
                return;
 
-       poll = drm_kms_helper_enable_hpd(dev);
-
-       if (dev->mode_config.delayed_event) {
-               /*
-                * FIXME:
-                *
-                * Use short (1s) delay to handle the initial delayed event.
-                * This delay should not be needed, but Optimus/nouveau will
-                * fail in a mysterious way if the delayed event is handled as
-                * soon as possible like it is done in
-                * drm_helper_probe_single_connector_modes() in case the poll
-                * was enabled before.
-                */
-               poll = true;
-               delay = HZ;
-       }
-
-       if (poll)
-               schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+       if (drm_kms_helper_enable_hpd(dev) ||
+           dev->mode_config.delayed_event)
+               reschedule_output_poll_work(dev);
 
        dev->mode_config.poll_running = true;
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
+/**
+ * drm_kms_helper_poll_reschedule - reschedule the output polling work
+ * @dev: drm_device
+ *
+ * This function reschedules the output polling work, after polling for a
+ * connector has been enabled.
+ *
+ * Drivers must call this helper after enabling polling for a connector by
+ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
+ * in drm_connector::polled. Note that after disabling polling by clearing these
+ * flags for a connector will stop the output polling work automatically if
+ * the polling is disabled for all other connectors as well.
+ *
+ * The function can be called only after polling has been enabled by calling
+ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
+ */
+void drm_kms_helper_poll_reschedule(struct drm_device *dev)
+{
+       if (dev->mode_config.poll_running)
+               reschedule_output_poll_work(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
+
 static enum drm_connector_status
 drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
 {
index f0ee9bc..b0c6a2a 100644 (file)
@@ -662,10 +662,24 @@ static const struct intel_display_device_info xe_lpdp_display = {
                BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
 };
 
+/*
+ * Separate detection for no display cases to keep the display id array simple.
+ *
+ * IVB Q requires subvendor and subdevice matching to differentiate from IVB D
+ * GT2 server.
+ */
+static bool has_no_display(struct pci_dev *pdev)
+{
+       static const struct pci_device_id ids[] = {
+               INTEL_IVB_Q_IDS(0),
+               {}
+       };
+
+       return pci_match_id(ids, pdev);
+}
+
 #undef INTEL_VGA_DEVICE
-#undef INTEL_QUANTA_VGA_DEVICE
 #define INTEL_VGA_DEVICE(id, info) { id, info }
-#define INTEL_QUANTA_VGA_DEVICE(info) { 0x16a, info }
 
 static const struct {
        u32 devid;
@@ -690,7 +704,6 @@ static const struct {
        INTEL_IRONLAKE_M_IDS(&ilk_m_display),
        INTEL_SNB_D_IDS(&snb_display),
        INTEL_SNB_M_IDS(&snb_display),
-       INTEL_IVB_Q_IDS(NULL),          /* must be first IVB in list */
        INTEL_IVB_M_IDS(&ivb_display),
        INTEL_IVB_D_IDS(&ivb_display),
        INTEL_HSW_IDS(&hsw_display),
@@ -775,6 +788,11 @@ intel_display_device_probe(struct drm_i915_private *i915, bool has_gmdid,
        if (has_gmdid)
                return probe_gmdid_display(i915, gmdid_ver, gmdid_rel, gmdid_step);
 
+       if (has_no_display(pdev)) {
+               drm_dbg_kms(&i915->drm, "Device doesn't have display\n");
+               return &no_display;
+       }
+
        for (i = 0; i < ARRAY_SIZE(intel_display_ids); i++) {
                if (intel_display_ids[i].devid == pdev->device)
                        return intel_display_ids[i].info;
index 1160fa2..5eac703 100644 (file)
@@ -211,7 +211,7 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
 
        /* Enable polling and queue hotplug re-enabling. */
        if (hpd_disabled) {
-               drm_kms_helper_poll_enable(&dev_priv->drm);
+               drm_kms_helper_poll_reschedule(&dev_priv->drm);
                mod_delayed_work(dev_priv->unordered_wq,
                                 &dev_priv->display.hotplug.reenable_work,
                                 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
@@ -649,7 +649,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
        drm_connector_list_iter_end(&conn_iter);
 
        if (enabled)
-               drm_kms_helper_poll_enable(&dev_priv->drm);
+               drm_kms_helper_poll_reschedule(&dev_priv->drm);
 
        mutex_unlock(&dev_priv->drm.mode_config.mutex);
 
index 21f9212..67e3aaf 100644 (file)
@@ -2752,7 +2752,7 @@ static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
        __drm_atomic_helper_connector_reset(&sdvo_connector->base.base,
                                            &conn_state->base.base);
 
-       INIT_LIST_HEAD(&sdvo_connector->base.panel.fixed_modes);
+       intel_panel_init_alloc(&sdvo_connector->base);
 
        return sdvo_connector;
 }
index ee9f83a..477df26 100644 (file)
@@ -470,12 +470,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
        ret = slpc_set_param(slpc,
                             SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
                             val);
-       if (ret)
+       if (ret) {
                guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n",
                                val, ERR_PTR(ret));
-       else
+       } else {
                slpc->ignore_eff_freq = val;
 
+               /* Set min to RPn when we disable efficient freq */
+               if (val)
+                       ret = slpc_set_param(slpc,
+                                            SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+                                            slpc->min_freq);
+       }
+
        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
        mutex_unlock(&slpc->lock);
        return ret;
@@ -602,9 +609,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
                return ret;
 
        if (!slpc->min_freq_softlimit) {
-               ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit);
-               if (unlikely(ret))
-                       return ret;
+               /* Min softlimit is initialized to RPn */
+               slpc->min_freq_softlimit = slpc->min_freq;
                slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
        } else {
                return intel_guc_slpc_set_min_freq(slpc,
@@ -755,6 +761,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
                return ret;
        }
 
+       /* Set cached value of ignore efficient freq */
+       intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
        /* Revert SLPC min/max to softlimits if necessary */
        ret = slpc_set_softlimits(slpc);
        if (unlikely(ret)) {
@@ -765,9 +774,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
        /* Set cached media freq ratio mode */
        intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
 
-       /* Set cached value of ignore efficient freq */
-       intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
        return 0;
 }
 
index ddd1462..fa70def 100644 (file)
@@ -26,6 +26,7 @@
  * The kernel driver is only responsible for loading the HuC firmware and
  * triggering its security authentication. This is done differently depending
  * on the platform:
+ *
  * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
  *   and the authentication via GuC
  * - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
  *   not-DG2 older platforms), while the authentication is done in 2-steps,
  *   a first auth for clear-media workloads via GuC and a second one for all
  *   workloads via GSC.
+ *
  * On platforms where the GuC does the authentication, to correctly do so the
  * HuC binary must be loaded before the GuC one.
  * Loading the HuC is optional; however, not using the HuC might negatively
index 0ad0c58..7d8671f 100644 (file)
@@ -443,7 +443,6 @@ static int i915_pcode_init(struct drm_i915_private *i915)
 static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 {
        struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-       struct pci_dev *root_pdev;
        int ret;
 
        if (i915_inject_probe_failure(dev_priv))
@@ -557,15 +556,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
        intel_bw_init_hw(dev_priv);
 
-       /*
-        * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
-        * This should be totally removed when we handle the pci states properly
-        * on runtime PM and on s2idle cases.
-        */
-       root_pdev = pcie_find_root_port(pdev);
-       if (root_pdev)
-               pci_d3cold_disable(root_pdev);
-
        return 0;
 
 err_opregion:
@@ -591,7 +581,6 @@ err_perf:
 static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 {
        struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-       struct pci_dev *root_pdev;
 
        i915_perf_fini(dev_priv);
 
@@ -599,10 +588,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 
        if (pdev->msi_enabled)
                pci_disable_msi(pdev);
-
-       root_pdev = pcie_find_root_port(pdev);
-       if (root_pdev)
-               pci_d3cold_enable(root_pdev);
 }
 
 /**
@@ -1517,6 +1502,8 @@ static int intel_runtime_suspend(struct device *kdev)
 {
        struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
        struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+       struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+       struct pci_dev *root_pdev;
        struct intel_gt *gt;
        int ret, i;
 
@@ -1568,6 +1555,15 @@ static int intel_runtime_suspend(struct device *kdev)
                drm_err(&dev_priv->drm,
                        "Unclaimed access detected prior to suspending\n");
 
+       /*
+        * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+        * This should be totally removed when we handle the pci states properly
+        * on runtime PM.
+        */
+       root_pdev = pcie_find_root_port(pdev);
+       if (root_pdev)
+               pci_d3cold_disable(root_pdev);
+
        rpm->suspended = true;
 
        /*
@@ -1606,6 +1602,8 @@ static int intel_runtime_resume(struct device *kdev)
 {
        struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
        struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+       struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+       struct pci_dev *root_pdev;
        struct intel_gt *gt;
        int ret, i;
 
@@ -1619,6 +1617,11 @@ static int intel_runtime_resume(struct device *kdev)
 
        intel_opregion_notify_adapter(dev_priv, PCI_D0);
        rpm->suspended = false;
+
+       root_pdev = pcie_find_root_port(pdev);
+       if (root_pdev)
+               pci_d3cold_enable(root_pdev);
+
        if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
                drm_dbg(&dev_priv->drm,
                        "Unclaimed access during suspend, bios?\n");
index f75c6f0..622f6eb 100644 (file)
@@ -967,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
        /* Determine display colour depth for everything except LVDS now,
         * DP requires this before mode_valid() is called.
         */
-       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
                nouveau_connector_detect_depth(connector);
 
        /* Find the native mode if this is a digital panel, if we didn't
@@ -1408,8 +1408,7 @@ nouveau_connector_create(struct drm_device *dev,
                ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index,
                                     &nv_connector->conn);
                if (ret) {
-                       kfree(nv_connector);
-                       return ERR_PTR(ret);
+                       goto drm_conn_err;
                }
 
                ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug",
@@ -1426,8 +1425,7 @@ nouveau_connector_create(struct drm_device *dev,
                        if (ret) {
                                nvif_event_dtor(&nv_connector->hpd);
                                nvif_conn_dtor(&nv_connector->conn);
-                               kfree(nv_connector);
-                               return ERR_PTR(ret);
+                               goto drm_conn_err;
                        }
                }
        }
@@ -1475,4 +1473,9 @@ nouveau_connector_create(struct drm_device *dev,
 
        drm_connector_register(connector);
        return connector;
+
+drm_conn_err:
+       drm_connector_cleanup(connector);
+       kfree(nv_connector);
+       return ERR_PTR(ret);
 }
index 40c8ea4..b8ac66b 100644 (file)
@@ -26,6 +26,8 @@
 #include "head.h"
 #include "ior.h"
 
+#include <drm/display/drm_dp.h>
+
 #include <subdev/bios.h>
 #include <subdev/bios/init.h>
 #include <subdev/gpio.h>
@@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
        return outp->dp.rates != 0;
 }
 
+/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps()
+ * converted to work inside nvkm. This is a temporary holdover until we start
+ * passing the drm_dp_aux device through NVKM
+ */
+static int
+nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp)
+{
+       struct nvkm_i2c_aux *aux = outp->dp.aux;
+       u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
+       int ret;
+
+       ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Prior to DP1.3 the bit represented by
+        * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved.
+        * If it is set DP_DPCD_REV at 0000h could be at a value less than
+        * the true capability of the panel. The only way to check is to
+        * then compare 0000h and 2200h.
+        */
+       if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+             DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT))
+               return 0;
+
+       ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext));
+       if (ret < 0)
+               return ret;
+
+       if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
+               OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n",
+                        outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]);
+               return 0;
+       }
+
+       if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)))
+               return 0;
+
+       memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext));
+
+       return 0;
+}
+
 void
 nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 {
@@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
                        memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr));
                }
 
-               if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) {
+               if (!nvkm_dp_read_dpcd_caps(outp)) {
                        const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 };
                        const u8 *rate;
                        int rate_max;
index 00dbeda..de161e7 100644 (file)
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110_grctx;
 void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
index 94233d0..52a234b 100644 (file)
@@ -906,7 +906,9 @@ static void
 gk104_grctx_generate_r419f78(struct gf100_gr *gr)
 {
        struct nvkm_device *device = gr->base.engine.subdev.device;
-       nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
 }
 
 void
index 4391458..3acdd9e 100644 (file)
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
        nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
 }
 
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+       struct nvkm_device *device = gr->base.engine.subdev.device;
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
 const struct gf100_grctx_func
 gk110_grctx = {
        .main  = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
        .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index 7b9a34f..5597e87 100644 (file)
@@ -103,4 +103,5 @@ gk110b_grctx = {
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
        .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index c78d07a..6126564 100644 (file)
@@ -568,4 +568,5 @@ gk208_grctx = {
        .dist_skip_table = gf117_grctx_generate_dist_skip_table,
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index beac66e..9906974 100644 (file)
@@ -988,4 +988,5 @@ gm107_grctx = {
        .r406500 = gm107_grctx_generate_r406500,
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r419e00 = gm107_grctx_generate_r419e00,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index 3b6c810..a7775aa 100644 (file)
@@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
        return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
 }
 
-int
-tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
-{
-       int ret;
-
-       ret = gm200_gr_load(gr, ver, fwif);
-       if (ret)
-               return ret;
-
-       return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
-                                &gr->bundle_veid);
-}
-
 static const struct gf100_gr_fwif
 tu102_gr_fwif[] = {
        {  0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
index 8f4f137..2130084 100644 (file)
@@ -404,38 +404,30 @@ static int jdi_panel_add(struct jdi_panel *jdi)
 
        ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(jdi->supplies),
                                      jdi->supplies);
-       if (ret < 0) {
-               dev_err(dev, "failed to init regulator, ret=%d\n", ret);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret,
+                                    "failed to init regulator, ret=%d\n", ret);
 
        jdi->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
        if (IS_ERR(jdi->enable_gpio)) {
-               ret = PTR_ERR(jdi->enable_gpio);
-               dev_err(dev, "cannot get enable-gpio %d\n", ret);
-               return ret;
+               return dev_err_probe(dev, PTR_ERR(jdi->enable_gpio),
+                                    "cannot get enable-gpio %d\n", ret);
        }
 
        jdi->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(jdi->reset_gpio)) {
-               ret = PTR_ERR(jdi->reset_gpio);
-               dev_err(dev, "cannot get reset-gpios %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->reset_gpio))
+               return dev_err_probe(dev, PTR_ERR(jdi->reset_gpio),
+                                    "cannot get reset-gpios %d\n", ret);
 
        jdi->dcdc_en_gpio = devm_gpiod_get(dev, "dcdc-en", GPIOD_OUT_LOW);
-       if (IS_ERR(jdi->dcdc_en_gpio)) {
-               ret = PTR_ERR(jdi->dcdc_en_gpio);
-               dev_err(dev, "cannot get dcdc-en-gpio %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->dcdc_en_gpio))
+               return dev_err_probe(dev, PTR_ERR(jdi->dcdc_en_gpio),
+                                    "cannot get dcdc-en-gpio %d\n", ret);
 
        jdi->backlight = drm_panel_create_dsi_backlight(jdi->dsi);
-       if (IS_ERR(jdi->backlight)) {
-               ret = PTR_ERR(jdi->backlight);
-               dev_err(dev, "failed to register backlight %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->backlight))
+               return dev_err_probe(dev, PTR_ERR(jdi->backlight),
+                                    "failed to register backlight %d\n", ret);
 
        drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs,
                       DRM_MODE_CONNECTOR_DSI);
index aaba36b..b38d0e9 100644 (file)
@@ -999,21 +999,21 @@ static const struct panel_desc auo_g104sn02 = {
        .connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
-static const struct drm_display_mode auo_g121ean01_mode = {
-       .clock = 66700,
-       .hdisplay = 1280,
-       .hsync_start = 1280 + 58,
-       .hsync_end = 1280 + 58 + 8,
-       .htotal = 1280 + 58 + 8 + 70,
-       .vdisplay = 800,
-       .vsync_start = 800 + 6,
-       .vsync_end = 800 + 6 + 4,
-       .vtotal = 800 + 6 + 4 + 10,
+static const struct display_timing auo_g121ean01_timing = {
+       .pixelclock = { 60000000, 74400000, 90000000 },
+       .hactive = { 1280, 1280, 1280 },
+       .hfront_porch = { 20, 50, 100 },
+       .hback_porch = { 20, 50, 100 },
+       .hsync_len = { 30, 100, 200 },
+       .vactive = { 800, 800, 800 },
+       .vfront_porch = { 2, 10, 25 },
+       .vback_porch = { 2, 10, 25 },
+       .vsync_len = { 4, 18, 50 },
 };
 
 static const struct panel_desc auo_g121ean01 = {
-       .modes = &auo_g121ean01_mode,
-       .num_modes = 1,
+       .timings = &auo_g121ean01_timing,
+       .num_timings = 1,
        .bpc = 8,
        .size = {
                .width = 261,
index 58dfb15..e78de99 100644 (file)
@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev)
                 * keep going without it; any other error means that we are
                 * supposed to read the bin value, but we failed doing so.
                 */
-               if (ret != -ENOENT) {
+               if (ret != -ENOENT && ret != -EOPNOTSUPP) {
                        DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
                        return ret;
                }
index ea993d7..307a890 100644 (file)
@@ -310,7 +310,7 @@ int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
                                      u32 domain,
                                      size_t size,
                                      struct qxl_surface *surf,
-                                     struct qxl_bo **qobj,
+                                     struct drm_gem_object **gobj,
                                      uint32_t *handle);
 void qxl_gem_object_free(struct drm_gem_object *gobj);
 int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv);
index d636ba6..17df5c7 100644 (file)
@@ -34,6 +34,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
 {
        struct qxl_device *qdev = to_qxl(dev);
        struct qxl_bo *qobj;
+       struct drm_gem_object *gobj;
        uint32_t handle;
        int r;
        struct qxl_surface surf;
@@ -62,11 +63,13 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
 
        r = qxl_gem_object_create_with_handle(qdev, file_priv,
                                              QXL_GEM_DOMAIN_CPU,
-                                             args->size, &surf, &qobj,
+                                             args->size, &surf, &gobj,
                                              &handle);
        if (r)
                return r;
+       qobj = gem_to_qxl_bo(gobj);
        qobj->is_dumb = true;
+       drm_gem_object_put(gobj);
        args->pitch = pitch;
        args->handle = handle;
        return 0;
index a08da0b..fc5e376 100644 (file)
@@ -72,32 +72,41 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size,
        return 0;
 }
 
+/*
+ * If the caller passed a valid gobj pointer, it is responsible to call
+ * drm_gem_object_put() when it no longer needs to acess the object.
+ *
+ * If gobj is NULL, it is handled internally.
+ */
 int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
                                      struct drm_file *file_priv,
                                      u32 domain,
                                      size_t size,
                                      struct qxl_surface *surf,
-                                     struct qxl_bo **qobj,
+                                     struct drm_gem_object **gobj,
                                      uint32_t *handle)
 {
-       struct drm_gem_object *gobj;
        int r;
+       struct drm_gem_object *local_gobj;
 
-       BUG_ON(!qobj);
        BUG_ON(!handle);
 
        r = qxl_gem_object_create(qdev, size, 0,
                                  domain,
                                  false, false, surf,
-                                 &gobj);
+                                 &local_gobj);
        if (r)
                return -ENOMEM;
-       r = drm_gem_handle_create(file_priv, gobj, handle);
+       r = drm_gem_handle_create(file_priv, local_gobj, handle);
        if (r)
                return r;
-       /* drop reference from allocate - handle holds it now */
-       *qobj = gem_to_qxl_bo(gobj);
-       drm_gem_object_put(gobj);
+
+       if (gobj)
+               *gobj = local_gobj;
+       else
+               /* drop reference from allocate - handle holds it now */
+               drm_gem_object_put(local_gobj);
+
        return 0;
 }
 
index 30f58b2..dd0f834 100644 (file)
@@ -38,7 +38,6 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
        struct qxl_device *qdev = to_qxl(dev);
        struct drm_qxl_alloc *qxl_alloc = data;
        int ret;
-       struct qxl_bo *qobj;
        uint32_t handle;
        u32 domain = QXL_GEM_DOMAIN_VRAM;
 
@@ -50,7 +49,7 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
                                                domain,
                                                qxl_alloc->size,
                                                NULL,
-                                               &qobj, &handle);
+                                               NULL, &handle);
        if (ret) {
                DRM_ERROR("%s: failed to create gem ret=%d\n",
                          __func__, ret);
@@ -386,7 +385,6 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 {
        struct qxl_device *qdev = to_qxl(dev);
        struct drm_qxl_alloc_surf *param = data;
-       struct qxl_bo *qobj;
        int handle;
        int ret;
        int size, actual_stride;
@@ -406,7 +404,7 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
                                                QXL_GEM_DOMAIN_SURFACE,
                                                size,
                                                &surf,
-                                               &qobj, &handle);
+                                               NULL, &handle);
        if (ret) {
                DRM_ERROR("%s: failed to create gem ret=%d\n",
                          __func__, ret);
index a530ecc..bf34498 100644 (file)
@@ -833,12 +833,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
         * need align with 2 pixel.
         */
        if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) {
-               DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n");
                return -EINVAL;
        }
 
        if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) {
-               DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n");
                return -EINVAL;
        }
 
@@ -846,7 +846,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                struct vop *vop = to_vop(crtc);
 
                if (!vop->data->afbc) {
-                       DRM_ERROR("vop does not support AFBC\n");
+                       DRM_DEBUG_KMS("vop does not support AFBC\n");
                        return -EINVAL;
                }
 
@@ -855,15 +855,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                        return ret;
 
                if (new_plane_state->src.x1 || new_plane_state->src.y1) {
-                       DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n",
-                                 new_plane_state->src.x1,
-                                 new_plane_state->src.y1, fb->offsets[0]);
+                       DRM_DEBUG_KMS("AFBC does not support offset display, " \
+                                     "xpos=%d, ypos=%d, offset=%d\n",
+                                     new_plane_state->src.x1, new_plane_state->src.y1,
+                                     fb->offsets[0]);
                        return -EINVAL;
                }
 
                if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) {
-                       DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
-                                 new_plane_state->rotation);
+                       DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n",
+                                     new_plane_state->rotation);
                        return -EINVAL;
                }
        }
index 82094c1..c438535 100644 (file)
@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp,
                if (!(flags & drm_vmw_synccpu_allow_cs)) {
                        atomic_dec(&vmw_bo->cpu_writers);
                }
-               ttm_bo_put(&vmw_bo->tbo);
+               vmw_user_bo_unref(vmw_bo);
        }
 
-       drm_gem_object_put(&vmw_bo->tbo.base);
        return ret;
 }
 
@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
                        return ret;
 
                ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
-               vmw_bo_unreference(&vbo);
-               drm_gem_object_put(&vbo->tbo.base);
+               vmw_user_bo_unref(vbo);
                if (unlikely(ret != 0)) {
                        if (ret == -ERESTARTSYS || ret == -EBUSY)
                                return -EBUSY;
index 50a836e..1d433fc 100644 (file)
@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf)
        return buf;
 }
 
+static inline void vmw_user_bo_unref(struct vmw_bo *vbo)
+{
+       if (vbo) {
+               ttm_bo_put(&vbo->tbo);
+               drm_gem_object_put(&vbo->tbo.base);
+       }
+}
+
 static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj)
 {
        return container_of((gobj), struct vmw_bo, tbo.base);
index 3810a99..58bfdf2 100644 (file)
@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct vmw_private *vmw)
        return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
 }
 
+static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
+                                          u32 shader_type)
+{
+       SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
+
+       if (shader_model >= VMW_SM_5)
+               max_allowed = SVGA3D_SHADERTYPE_MAX;
+       else if (shader_model >= VMW_SM_4)
+               max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
+       return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
+}
+
 #endif
index 6b9aa2b..98e0723 100644 (file)
@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
        }
        vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
        ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-       ttm_bo_put(&vmw_bo->tbo);
-       drm_gem_object_put(&vmw_bo->tbo.base);
+       vmw_user_bo_unref(vmw_bo);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
        vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
                             VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
        ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-       ttm_bo_put(&vmw_bo->tbo);
-       drm_gem_object_put(&vmw_bo->tbo.base);
+       vmw_user_bo_unref(vmw_bo);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1992,7 +1990,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 
        cmd = container_of(header, typeof(*cmd), header);
 
-       if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
+       if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
                VMW_DEBUG_USER("Illegal shader type %u.\n",
                               (unsigned int) cmd->body.type);
                return -EINVAL;
@@ -2115,8 +2113,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
                                      SVGA3dCmdHeader *header)
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
-       SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
-               SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
 
        struct vmw_resource *res = NULL;
        struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
@@ -2133,6 +2129,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
        if (unlikely(ret != 0))
                return ret;
 
+       if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
+           cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+               VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+                              (unsigned int) cmd->body.type,
+                              (unsigned int) cmd->body.slot);
+               return -EINVAL;
+       }
+
        binding.bi.ctx = ctx_node->ctx;
        binding.bi.res = res;
        binding.bi.bt = vmw_ctx_binding_cb;
@@ -2141,14 +2145,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
        binding.size = cmd->body.sizeInBytes;
        binding.slot = cmd->body.slot;
 
-       if (binding.shader_slot >= max_shader_num ||
-           binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
-               VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
-                              (unsigned int) cmd->body.type,
-                              (unsigned int) binding.slot);
-               return -EINVAL;
-       }
-
        vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
                        binding.slot);
 
@@ -2207,15 +2203,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv,
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
                container_of(header, typeof(*cmd), header);
-       SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-               SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
 
        u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
                sizeof(SVGA3dShaderResourceViewId);
 
        if ((u64) cmd->body.startView + (u64) num_sr_view >
            (u64) SVGA3D_DX_MAX_SRVIEWS ||
-           cmd->body.type >= max_allowed) {
+           !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
                VMW_DEBUG_USER("Invalid shader binding.\n");
                return -EINVAL;
        }
@@ -2239,8 +2233,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
                                 SVGA3dCmdHeader *header)
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
-       SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-               SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
        struct vmw_resource *res = NULL;
        struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
        struct vmw_ctx_bindinfo_shader binding;
@@ -2251,8 +2243,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
 
        cmd = container_of(header, typeof(*cmd), header);
 
-       if (cmd->body.type >= max_allowed ||
-           cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
+       if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
                VMW_DEBUG_USER("Illegal shader type %u.\n",
                               (unsigned int) cmd->body.type);
                return -EINVAL;
index b62207b..1489ad7 100644 (file)
@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 err_out:
        /* vmw_user_lookup_handle takes one ref so does new_fb */
-       if (bo) {
-               vmw_bo_unreference(&bo);
-               drm_gem_object_put(&bo->tbo.base);
-       }
+       if (bo)
+               vmw_user_bo_unref(bo);
        if (surface)
                vmw_surface_unreference(&surface);
 
index 7e11231..fb85f24 100644 (file)
@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 
        ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
 
-       vmw_bo_unreference(&buf);
-       drm_gem_object_put(&buf->tbo.base);
+       vmw_user_bo_unref(buf);
 
 out_unlock:
        mutex_unlock(&overlay->mutex);
index e7226db..1e81ff2 100644 (file)
@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
                                    shader_type, num_input_sig,
                                    num_output_sig, tfile, shader_handle);
 out_bad_arg:
-       vmw_bo_unreference(&buffer);
-       drm_gem_object_put(&buffer->tbo.base);
+       vmw_user_bo_unref(buffer);
        return ret;
 }
 
index a997dbc..0238078 100644 (file)
 
 #include <linux/crc16.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/hid.h>
 #include <linux/hwmon.h>
 #include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
@@ -63,6 +65,8 @@ static const char *const aqc_device_names[] = {
 #define CTRL_REPORT_ID                 0x03
 #define AQUAERO_CTRL_REPORT_ID         0x0b
 
+#define CTRL_REPORT_DELAY              200     /* ms */
+
 /* The HID report that the official software always sends
  * after writing values, currently same for all devices
  */
@@ -527,6 +531,9 @@ struct aqc_data {
        int secondary_ctrl_report_size;
        u8 *secondary_ctrl_report;
 
+       ktime_t last_ctrl_report_op;
+       int ctrl_report_delay;  /* Delay between two ctrl report operations, in ms */
+
        int buffer_size;
        u8 *buffer;
        int checksum_start;
@@ -611,17 +618,35 @@ static int aqc_aquastreamxt_convert_fan_rpm(u16 val)
        return 0;
 }
 
+static void aqc_delay_ctrl_report(struct aqc_data *priv)
+{
+       /*
+        * If previous read or write is too close to this one, delay the current operation
+        * to give the device enough time to process the previous one.
+        */
+       if (priv->ctrl_report_delay) {
+               s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op);
+
+               if (delta < priv->ctrl_report_delay)
+                       msleep(priv->ctrl_report_delay - delta);
+       }
+}
+
 /* Expects the mutex to be locked */
 static int aqc_get_ctrl_data(struct aqc_data *priv)
 {
        int ret;
 
+       aqc_delay_ctrl_report(priv);
+
        memset(priv->buffer, 0x00, priv->buffer_size);
        ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0)
                ret = -ENODATA;
 
+       priv->last_ctrl_report_op = ktime_get();
+
        return ret;
 }
 
@@ -631,6 +656,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
        int ret;
        u16 checksum;
 
+       aqc_delay_ctrl_report(priv);
+
        /* Checksum is not needed for Aquaero */
        if (priv->kind != aquaero) {
                /* Init and xorout value for CRC-16/USB is 0xffff */
@@ -646,12 +673,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
        ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                 HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
        if (ret < 0)
-               return ret;
+               goto record_access_and_ret;
 
        /* The official software sends this report after every change, so do it here as well */
        ret = hid_hw_raw_request(priv->hdev, priv->secondary_ctrl_report_id,
                                 priv->secondary_ctrl_report, priv->secondary_ctrl_report_size,
                                 HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+
+record_access_and_ret:
+       priv->last_ctrl_report_op = ktime_get();
+
        return ret;
 }
 
@@ -1524,6 +1555,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
                priv->buffer_size = AQUAERO_CTRL_REPORT_SIZE;
                priv->temp_ctrl_offset = AQUAERO_TEMP_CTRL_OFFSET;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->temp_label = label_temp_sensors;
                priv->virtual_temp_label = label_virtual_temp_sensors;
@@ -1547,6 +1579,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = D5NEXT_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES;
 
@@ -1597,6 +1630,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = OCTO_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = OCTO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->power_cycle_count_offset = OCTO_POWER_CYCLES;
 
@@ -1624,6 +1658,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = QUADRO_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = QUADRO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->flow_pulses_ctrl_offset = QUADRO_FLOW_PULSES_CTRL_OFFSET;
                priv->power_cycle_count_offset = QUADRO_POWER_CYCLES;
index fa5070a..7c5f4b1 100644 (file)
 enum chips {pfe1100, pfe3000};
 
 /*
- * Disable status check for pfe3000 devices, because some devices report
- * communication error (invalid command) for VOUT_MODE command (0x20)
- * although correct VOUT_MODE (0x16) is returned: it leads to incorrect
- * exponent in linear mode.
+ * Disable status check because some devices report communication error
+ * (invalid command) for VOUT_MODE command (0x20) although the correct
+ * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear
+ * mode.
+ * This affects both pfe3000 and pfe1100.
  */
-static struct pmbus_platform_data pfe3000_plat_data = {
+static struct pmbus_platform_data pfe_plat_data = {
        .flags = PMBUS_SKIP_STATUS_CHECK,
 };
 
@@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client)
        int model;
 
        model = (int)i2c_match_id(pfe_device_id, client)->driver_data;
+       client->dev.platform_data = &pfe_plat_data;
 
        /*
         * PFE3000-12-069RA devices may not stay in page 0 during device
         * probe which leads to probe failure (read status word failed).
         * So let's set the device to page 0 at the beginning.
         */
-       if (model == pfe3000) {
-               client->dev.platform_data = &pfe3000_plat_data;
+       if (model == pfe3000)
                i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
-       }
 
        return pmbus_do_probe(client, &pfe_driver_info[model]);
 }
index 2d8342f..05c8068 100644 (file)
@@ -233,13 +233,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
                                   u32 offset)
 {
        u32 val;
+       unsigned long flags;
 
        if (iproc_i2c->idm_base) {
-               spin_lock(&iproc_i2c->idm_lock);
+               spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
                writel(iproc_i2c->ape_addr_mask,
                       iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
                val = readl(iproc_i2c->base + offset);
-               spin_unlock(&iproc_i2c->idm_lock);
+               spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
        } else {
                val = readl(iproc_i2c->base + offset);
        }
@@ -250,12 +251,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
 static inline void iproc_i2c_wr_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
                                    u32 offset, u32 val)
 {
+       unsigned long flags;
+
        if (iproc_i2c->idm_base) {
-               spin_lock(&iproc_i2c->idm_lock);
+               spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
                writel(iproc_i2c->ape_addr_mask,
                       iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
                writel(val, iproc_i2c->base + offset);
-               spin_unlock(&iproc_i2c->idm_lock);
+               spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
        } else {
                writel(val, iproc_i2c->base + offset);
        }
index 3bfd7a2..24bef00 100644 (file)
@@ -588,9 +588,21 @@ i2c_dw_read(struct dw_i2c_dev *dev)
                        u32 flags = msgs[dev->msg_read_idx].flags;
 
                        regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+                       tmp &= DW_IC_DATA_CMD_DAT;
                        /* Ensure length byte is a valid value */
-                       if (flags & I2C_M_RECV_LEN &&
-                           (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+                       if (flags & I2C_M_RECV_LEN) {
+                               /*
+                                * if IC_EMPTYFIFO_HOLD_MASTER_EN is set, which cannot be
+                                * detected from the registers, the controller can be
+                                * disabled if the STOP bit is set. But it is only set
+                                * after receiving block data response length in
+                                * I2C_FUNC_SMBUS_BLOCK_DATA case. That needs to read
+                                * another byte with STOP bit set when the block data
+                                * response length is invalid to complete the transaction.
+                                */
+                               if (!tmp || tmp > I2C_SMBUS_BLOCK_MAX)
+                                       tmp = 1;
+
                                len = i2c_dw_recv_len(dev, tmp);
                        }
                        *buf++ = tmp;
index e067671..0980c77 100644 (file)
@@ -330,6 +330,14 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context)
        struct hisi_i2c_controller *ctlr = context;
        u32 int_stat;
 
+       /*
+        * Don't handle the interrupt if cltr->completion is NULL. We may
+        * reach here because the interrupt is spurious or the transfer is
+        * started by another port (e.g. firmware) rather than us.
+        */
+       if (!ctlr->completion)
+               return IRQ_NONE;
+
        int_stat = readl(ctlr->iobase + HISI_I2C_INT_MSTAT);
        hisi_i2c_clear_int(ctlr, int_stat);
        if (!(int_stat & HISI_I2C_INT_ALL))
index c3287c8..150d923 100644 (file)
@@ -209,6 +209,9 @@ static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
        lpi2c_imx_set_mode(lpi2c_imx);
 
        clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
+       if (!clk_rate)
+               return -EINVAL;
+
        if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
                filt = 0;
        else
index ad8270c..fa6020d 100644 (file)
@@ -250,7 +250,8 @@ static int p2wi_probe(struct platform_device *pdev)
 
        p2wi->rstc = devm_reset_control_get_exclusive(dev, NULL);
        if (IS_ERR(p2wi->rstc)) {
-               dev_err(dev, "failed to retrieve reset controller: %d\n", ret);
+               dev_err(dev, "failed to retrieve reset controller: %pe\n",
+                       p2wi->rstc);
                return PTR_ERR(p2wi->rstc);
        }
 
index bcbbf23..03fc10b 100644 (file)
@@ -442,7 +442,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
        if (IS_VI(i2c_dev))
                return 0;
 
-       if (!i2c_dev->hw->has_apb_dma) {
+       if (i2c_dev->hw->has_apb_dma) {
                if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) {
                        dev_dbg(i2c_dev->dev, "APB DMA support not enabled\n");
                        return 0;
@@ -460,6 +460,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
        i2c_dev->dma_chan = dma_request_chan(i2c_dev->dev, "tx");
        if (IS_ERR(i2c_dev->dma_chan)) {
                err = PTR_ERR(i2c_dev->dma_chan);
+               i2c_dev->dma_chan = NULL;
                goto err_out;
        }
 
index 8685e0b..7bc3ebf 100644 (file)
@@ -62,7 +62,6 @@
 #define AD7192_MODE_STA_MASK   BIT(20) /* Status Register transmission Mask */
 #define AD7192_MODE_CLKSRC(x)  (((x) & 0x3) << 18) /* Clock Source Select */
 #define AD7192_MODE_SINC3      BIT(15) /* SINC3 Filter Select */
-#define AD7192_MODE_ACX                BIT(14) /* AC excitation enable(AD7195 only)*/
 #define AD7192_MODE_ENPAR      BIT(13) /* Parity Enable */
 #define AD7192_MODE_CLKDIV     BIT(12) /* Clock divide by 2 (AD7190/2 only)*/
 #define AD7192_MODE_SCYCLE     BIT(11) /* Single cycle conversion */
@@ -91,6 +90,7 @@
 /* Configuration Register Bit Designations (AD7192_REG_CONF) */
 
 #define AD7192_CONF_CHOP       BIT(23) /* CHOP enable */
+#define AD7192_CONF_ACX                BIT(22) /* AC excitation enable(AD7195 only) */
 #define AD7192_CONF_REFSEL     BIT(20) /* REFIN1/REFIN2 Reference Select */
 #define AD7192_CONF_CHAN(x)    ((x) << 8) /* Channel select */
 #define AD7192_CONF_CHAN_MASK  (0x7FF << 8) /* Channel select mask */
@@ -472,7 +472,7 @@ static ssize_t ad7192_show_ac_excitation(struct device *dev,
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct ad7192_state *st = iio_priv(indio_dev);
 
-       return sysfs_emit(buf, "%d\n", !!(st->mode & AD7192_MODE_ACX));
+       return sysfs_emit(buf, "%d\n", !!(st->conf & AD7192_CONF_ACX));
 }
 
 static ssize_t ad7192_show_bridge_switch(struct device *dev,
@@ -513,13 +513,13 @@ static ssize_t ad7192_set(struct device *dev,
 
                ad_sd_write_reg(&st->sd, AD7192_REG_GPOCON, 1, st->gpocon);
                break;
-       case AD7192_REG_MODE:
+       case AD7192_REG_CONF:
                if (val)
-                       st->mode |= AD7192_MODE_ACX;
+                       st->conf |= AD7192_CONF_ACX;
                else
-                       st->mode &= ~AD7192_MODE_ACX;
+                       st->conf &= ~AD7192_CONF_ACX;
 
-               ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode);
+               ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, st->conf);
                break;
        default:
                ret = -EINVAL;
@@ -579,12 +579,11 @@ static IIO_DEVICE_ATTR(bridge_switch_en, 0644,
 
 static IIO_DEVICE_ATTR(ac_excitation_en, 0644,
                       ad7192_show_ac_excitation, ad7192_set,
-                      AD7192_REG_MODE);
+                      AD7192_REG_CONF);
 
 static struct attribute *ad7192_attributes[] = {
        &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
        &iio_dev_attr_bridge_switch_en.dev_attr.attr,
-       &iio_dev_attr_ac_excitation_en.dev_attr.attr,
        NULL
 };
 
@@ -595,6 +594,7 @@ static const struct attribute_group ad7192_attribute_group = {
 static struct attribute *ad7195_attributes[] = {
        &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
        &iio_dev_attr_bridge_switch_en.dev_attr.attr,
+       &iio_dev_attr_ac_excitation_en.dev_attr.attr,
        NULL
 };
 
index 213526c..aea83f3 100644 (file)
@@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = {
 enum ina2xx_ids { ina219, ina226 };
 
 struct ina2xx_config {
+       const char *name;
        u16 config_default;
        int calibration_value;
        int shunt_voltage_lsb;  /* nV */
@@ -155,6 +156,7 @@ struct ina2xx_chip_info {
 
 static const struct ina2xx_config ina2xx_config[] = {
        [ina219] = {
+               .name = "ina219",
                .config_default = INA219_CONFIG_DEFAULT,
                .calibration_value = 4096,
                .shunt_voltage_lsb = 10000,
@@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = {
                .chip_id = ina219,
        },
        [ina226] = {
+               .name = "ina226",
                .config_default = INA226_CONFIG_DEFAULT,
                .calibration_value = 2048,
                .shunt_voltage_lsb = 2500,
@@ -996,7 +999,7 @@ static int ina2xx_probe(struct i2c_client *client)
        /* Patch the current config register with default. */
        val = chip->config->config_default;
 
-       if (id->driver_data == ina226) {
+       if (type == ina226) {
                ina226_set_average(chip, INA226_DEFAULT_AVG, &val);
                ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val);
                ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val);
@@ -1015,7 +1018,7 @@ static int ina2xx_probe(struct i2c_client *client)
        }
 
        indio_dev->modes = INDIO_DIRECT_MODE;
-       if (id->driver_data == ina226) {
+       if (type == ina226) {
                indio_dev->channels = ina226_channels;
                indio_dev->num_channels = ARRAY_SIZE(ina226_channels);
                indio_dev->info = &ina226_info;
@@ -1024,7 +1027,7 @@ static int ina2xx_probe(struct i2c_client *client)
                indio_dev->num_channels = ARRAY_SIZE(ina219_channels);
                indio_dev->info = &ina219_info;
        }
-       indio_dev->name = id->name;
+       indio_dev->name = id ? id->name : chip->config->name;
 
        ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev,
                                          &ina2xx_setup_ops);
index af6bfcc..eb78a6f 100644 (file)
@@ -916,12 +916,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
                goto err_vref;
        }
 
-       ret = clk_prepare_enable(priv->core_clk);
-       if (ret) {
-               dev_err(dev, "failed to enable core clk\n");
-               goto err_core_clk;
-       }
-
        regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1);
        regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0,
                           MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval);
@@ -948,8 +942,6 @@ err_adc_clk:
        regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
                           MESON_SAR_ADC_REG3_ADC_EN, 0);
        meson_sar_adc_set_bandgap(indio_dev, false);
-       clk_disable_unprepare(priv->core_clk);
-err_core_clk:
        regulator_disable(priv->vref);
 err_vref:
        meson_sar_adc_unlock(indio_dev);
@@ -977,8 +969,6 @@ static void meson_sar_adc_hw_disable(struct iio_dev *indio_dev)
 
        meson_sar_adc_set_bandgap(indio_dev, false);
 
-       clk_disable_unprepare(priv->core_clk);
-
        regulator_disable(priv->vref);
 
        if (!ret)
@@ -1211,7 +1201,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
        if (IS_ERR(priv->clkin))
                return dev_err_probe(dev, PTR_ERR(priv->clkin), "failed to get clkin\n");
 
-       priv->core_clk = devm_clk_get(dev, "core");
+       priv->core_clk = devm_clk_get_enabled(dev, "core");
        if (IS_ERR(priv->core_clk))
                return dev_err_probe(dev, PTR_ERR(priv->core_clk), "failed to get core clk\n");
 
@@ -1294,15 +1284,26 @@ static int meson_sar_adc_remove(struct platform_device *pdev)
 static int meson_sar_adc_suspend(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
 
        meson_sar_adc_hw_disable(indio_dev);
 
+       clk_disable_unprepare(priv->core_clk);
+
        return 0;
 }
 
 static int meson_sar_adc_resume(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
+       int ret;
+
+       ret = clk_prepare_enable(priv->core_clk);
+       if (ret) {
+               dev_err(dev, "failed to enable core clk\n");
+               return ret;
+       }
 
        return meson_sar_adc_hw_enable(indio_dev);
 }
index 943e9e1..b72d39f 100644 (file)
@@ -253,7 +253,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
        platform_set_drvdata(pdev, indio_dev);
 
        state->ec = ec->ec_dev;
-       state->msg = devm_kzalloc(&pdev->dev,
+       state->msg = devm_kzalloc(&pdev->dev, sizeof(*state->msg) +
                                max((u16)sizeof(struct ec_params_motion_sense),
                                state->ec->max_response), GFP_KERNEL);
        if (!state->msg)
index 9bf8337..8c8e0bb 100644 (file)
@@ -344,9 +344,12 @@ static int admv1013_update_quad_filters(struct admv1013_state *st)
 
 static int admv1013_update_mixer_vgate(struct admv1013_state *st)
 {
-       unsigned int vcm, mixer_vgate;
+       unsigned int mixer_vgate;
+       int vcm;
 
        vcm = regulator_get_voltage(st->reg);
+       if (vcm < 0)
+               return vcm;
 
        if (vcm < 1800000)
                mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
index 6a18b36..b6e6b1d 100644 (file)
@@ -2687,7 +2687,7 @@ unknown_format:
 static int lsm6dsx_get_acpi_mount_matrix(struct device *dev,
                                          struct iio_mount_matrix *orientation)
 {
-       return false;
+       return -EOPNOTSUPP;
 }
 
 #endif
index c117f50..adcba83 100644 (file)
@@ -1888,7 +1888,7 @@ static const struct iio_buffer_setup_ops noop_ring_setup_ops;
 int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
 {
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
-       struct fwnode_handle *fwnode;
+       struct fwnode_handle *fwnode = NULL;
        int ret;
 
        if (!indio_dev->info)
@@ -1899,7 +1899,8 @@ int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
        /* If the calling driver did not initialize firmware node, do it here */
        if (dev_fwnode(&indio_dev->dev))
                fwnode = dev_fwnode(&indio_dev->dev);
-       else
+       /* The default dummy IIO device has no parent */
+       else if (indio_dev->dev.parent)
                fwnode = dev_fwnode(indio_dev->dev.parent);
        device_set_node(&indio_dev->dev, fwnode);
 
index 489902b..b50bf89 100644 (file)
@@ -190,7 +190,7 @@ static const struct iio_itime_sel_mul bu27008_itimes[] = {
        .address = BU27008_REG_##data##_LO,                                     \
        .scan_index = BU27008_##color,                                          \
        .scan_type = {                                                          \
-               .sign = 's',                                                    \
+               .sign = 'u',                                                    \
                .realbits = 16,                                                 \
                .storagebits = 16,                                              \
                .endianness = IIO_LE,                                           \
@@ -633,7 +633,7 @@ static int bu27008_try_find_new_time_gain(struct bu27008_data *data, int val,
        for (i = 0; i < data->gts.num_itime; i++) {
                new_time_sel = data->gts.itime_table[i].sel;
                ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts,
-                                       new_time_sel, val, val2 * 1000, gain_sel);
+                                       new_time_sel, val, val2, gain_sel);
                if (!ret)
                        break;
        }
@@ -662,7 +662,7 @@ static int bu27008_set_scale(struct bu27008_data *data,
                goto unlock_out;
 
        ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
-                                               val, val2 * 1000, &gain_sel);
+                                               val, val2, &gain_sel);
        if (ret) {
                ret = bu27008_try_find_new_time_gain(data, val, val2, &gain_sel);
                if (ret)
@@ -677,6 +677,21 @@ unlock_out:
        return ret;
 }
 
+static int bu27008_write_raw_get_fmt(struct iio_dev *indio_dev,
+                                    struct iio_chan_spec const *chan,
+                                    long mask)
+{
+
+       switch (mask) {
+       case IIO_CHAN_INFO_SCALE:
+               return IIO_VAL_INT_PLUS_NANO;
+       case IIO_CHAN_INFO_INT_TIME:
+               return IIO_VAL_INT_PLUS_MICRO;
+       default:
+               return -EINVAL;
+       }
+}
+
 static int bu27008_write_raw(struct iio_dev *idev,
                             struct iio_chan_spec const *chan,
                             int val, int val2, long mask)
@@ -756,6 +771,7 @@ static int bu27008_update_scan_mode(struct iio_dev *idev,
 static const struct iio_info bu27008_info = {
        .read_raw = &bu27008_read_raw,
        .write_raw = &bu27008_write_raw,
+       .write_raw_get_fmt = &bu27008_write_raw_get_fmt,
        .read_avail = &bu27008_read_avail,
        .update_scan_mode = bu27008_update_scan_mode,
        .validate_trigger = iio_validate_own_trigger,
index e63ef57..bf3de85 100644 (file)
@@ -575,7 +575,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
                return -EINVAL;
 
        if (chan == BU27034_CHAN_ALS) {
-               if (val == 0 && val2 == 1000)
+               if (val == 0 && val2 == 1000000)
                        return 0;
 
                return -EINVAL;
@@ -587,7 +587,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
                goto unlock_out;
 
        ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
-                                               val, val2 * 1000, &gain_sel);
+                                               val, val2, &gain_sel);
        if (ret) {
                /*
                 * Could not support scale with given time. Need to change time.
@@ -624,7 +624,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
 
                        /* Can we provide requested scale with this time? */
                        ret = iio_gts_find_gain_sel_for_scale_using_time(
-                               &data->gts, new_time_sel, val, val2 * 1000,
+                               &data->gts, new_time_sel, val, val2,
                                &gain_sel);
                        if (ret)
                                continue;
@@ -1217,6 +1217,21 @@ static int bu27034_read_raw(struct iio_dev *idev,
        }
 }
 
+static int bu27034_write_raw_get_fmt(struct iio_dev *indio_dev,
+                                    struct iio_chan_spec const *chan,
+                                    long mask)
+{
+
+       switch (mask) {
+       case IIO_CHAN_INFO_SCALE:
+               return IIO_VAL_INT_PLUS_NANO;
+       case IIO_CHAN_INFO_INT_TIME:
+               return IIO_VAL_INT_PLUS_MICRO;
+       default:
+               return -EINVAL;
+       }
+}
+
 static int bu27034_write_raw(struct iio_dev *idev,
                             struct iio_chan_spec const *chan,
                             int val, int val2, long mask)
@@ -1267,6 +1282,7 @@ static int bu27034_read_avail(struct iio_dev *idev,
 static const struct iio_info bu27034_info = {
        .read_raw = &bu27034_read_raw,
        .write_raw = &bu27034_write_raw,
+       .write_raw_get_fmt = &bu27034_write_raw_get_fmt,
        .read_avail = &bu27034_read_avail,
 };
 
index 755a9c5..f9ab671 100644 (file)
@@ -85,6 +85,8 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
        dma_addr_t mask;
        int i;
 
+       umem->iova = va = virt;
+
        if (umem->is_odp) {
                unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
 
@@ -100,7 +102,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
         */
        pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
 
-       umem->iova = va = virt;
        /* The best result is the smallest page size that results in the minimum
         * number of required pages. Compute the largest page size that could
         * work based on VA address bits that don't change.
index b42166f..63e98e2 100644 (file)
@@ -1253,6 +1253,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
 
        rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
        if (rc) {
+               bnxt_unregister_dev(rdev->en_dev);
+               clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
                ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
                return -EINVAL;
        }
@@ -1526,8 +1528,8 @@ static void bnxt_re_remove(struct auxiliary_device *adev)
        }
        bnxt_re_setup_cc(rdev, false);
        ib_unregister_device(&rdev->ibdev);
-       ib_dealloc_device(&rdev->ibdev);
        bnxt_re_dev_uninit(rdev);
+       ib_dealloc_device(&rdev->ibdev);
 skip_remove:
        mutex_unlock(&bnxt_re_mutex);
 }
index 5fd8f7c..739d942 100644 (file)
@@ -819,6 +819,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
        }
 
        memset((u8 *)dpit->tbl, 0xFF, bytes);
+       mutex_init(&res->dpi_tbl_lock);
        dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
 
        return 0;
index 9dbb89e..baaa440 100644 (file)
@@ -12307,6 +12307,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
 
        if (dd->synth_stats_timer.function)
                del_timer_sync(&dd->synth_stats_timer);
+       cancel_work_sync(&dd->update_cntr_work);
        ppd = (struct hfi1_pportdata *)(dd + 1);
        for (i = 0; i < dd->num_pports; i++, ppd++) {
                kfree(ppd->cntrs);
index 8f385f9..d5f2a6b 100644 (file)
@@ -83,6 +83,11 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm)
 
                temp = agg_peak[bucket] * bcm->vote_scale;
                bcm->vote_y[bucket] = bcm_div(temp, bcm->aux_data.unit);
+
+               if (bcm->enable_mask && (bcm->vote_x[bucket] || bcm->vote_y[bucket])) {
+                       bcm->vote_x[bucket] = 0;
+                       bcm->vote_y[bucket] = bcm->enable_mask;
+               }
        }
 
        if (bcm->keepalive && bcm->vote_x[QCOM_ICC_BUCKET_AMC] == 0 &&
index 04391c1..7843d88 100644 (file)
@@ -81,6 +81,7 @@ struct qcom_icc_node {
  * @vote_x: aggregated threshold values, represents sum_bw when @type is bw bcm
  * @vote_y: aggregated threshold values, represents peak_bw when @type is bw bcm
  * @vote_scale: scaling factor for vote_x and vote_y
+ * @enable_mask: optional mask to send as vote instead of vote_x/vote_y
  * @dirty: flag used to indicate whether the bcm needs to be committed
  * @keepalive: flag used to indicate whether a keepalive is required
  * @aux_data: auxiliary data used when calculating threshold values and
@@ -97,6 +98,7 @@ struct qcom_icc_bcm {
        u64 vote_x[QCOM_ICC_NUM_BUCKETS];
        u64 vote_y[QCOM_ICC_NUM_BUCKETS];
        u64 vote_scale;
+       u32 enable_mask;
        bool dirty;
        bool keepalive;
        struct bcm_db aux_data;
index da21cc3..f565386 100644 (file)
@@ -1873,6 +1873,7 @@ static struct qcom_icc_node srvc_snoc = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
index 2d7a8e7..e64c214 100644 (file)
@@ -1337,6 +1337,7 @@ static struct qcom_icc_node qns_mem_noc_sf_disp = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
@@ -1349,6 +1350,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
 
 static struct qcom_icc_bcm bcm_cn0 = {
        .name = "CN0",
+       .enable_mask = 0x1,
        .keepalive = true,
        .num_nodes = 55,
        .nodes = { &qnm_gemnoc_cnoc, &qnm_gemnoc_pcie,
@@ -1383,6 +1385,7 @@ static struct qcom_icc_bcm bcm_cn0 = {
 
 static struct qcom_icc_bcm bcm_co0 = {
        .name = "CO0",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qxm_nsp, &qns_nsp_gemnoc },
 };
@@ -1403,6 +1406,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
 
 static struct qcom_icc_bcm bcm_mm1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 12,
        .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
                   &qnm_camnoc_sf, &qnm_mdp,
@@ -1445,6 +1449,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
 
 static struct qcom_icc_bcm bcm_sh1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 7,
        .nodes = { &alm_gpu_tcu, &alm_sys_tcu,
                   &qnm_nsp_gemnoc, &qnm_pcie,
@@ -1461,6 +1466,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
 
 static struct qcom_icc_bcm bcm_sn1 = {
        .name = "SN1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qhm_gic, &qxm_pimem,
                   &xm_gic, &qns_gemnoc_gc },
@@ -1492,6 +1498,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
 
 static struct qcom_icc_bcm bcm_acv_disp = {
        .name = "ACV",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &ebi_disp },
 };
@@ -1510,6 +1517,7 @@ static struct qcom_icc_bcm bcm_mm0_disp = {
 
 static struct qcom_icc_bcm bcm_mm1_disp = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mdp_disp, &qnm_rot_disp,
                   &qns_mem_noc_sf_disp },
@@ -1523,6 +1531,7 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
 
 static struct qcom_icc_bcm bcm_sh1_disp = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &qnm_pcie_disp },
 };
index d823ba9..0864ed2 100644 (file)
@@ -1473,6 +1473,7 @@ static struct qcom_icc_node qns_mem_noc_sf_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
@@ -1485,6 +1486,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
 
 static struct qcom_icc_bcm bcm_cn0 = {
        .name = "CN0",
+       .enable_mask = 0x1,
        .keepalive = true,
        .num_nodes = 54,
        .nodes = { &qsm_cfg, &qhs_ahb2phy0,
@@ -1524,6 +1526,7 @@ static struct qcom_icc_bcm bcm_cn1 = {
 
 static struct qcom_icc_bcm bcm_co0 = {
        .name = "CO0",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qxm_nsp, &qns_nsp_gemnoc },
 };
@@ -1549,6 +1552,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
 
 static struct qcom_icc_bcm bcm_mm1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 8,
        .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
                   &qnm_camnoc_sf, &qnm_vapss_hcp,
@@ -1589,6 +1593,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
 
 static struct qcom_icc_bcm bcm_sh1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 13,
        .nodes = { &alm_gpu_tcu, &alm_sys_tcu,
                   &chm_apps, &qnm_gpu,
@@ -1608,6 +1613,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
 
 static struct qcom_icc_bcm bcm_sn1 = {
        .name = "SN1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qhm_gic, &xm_gic,
                   &qns_gemnoc_gc },
@@ -1633,6 +1639,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
 
 static struct qcom_icc_bcm bcm_acv_disp = {
        .name = "ACV",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &ebi_disp },
 };
@@ -1657,12 +1664,14 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
 
 static struct qcom_icc_bcm bcm_sh1_disp = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qnm_mnoc_hf_disp, &qnm_pcie_disp },
 };
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_0 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_0 },
 };
@@ -1681,6 +1690,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_0 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_0, &qnm_camnoc_icp_cam_ife_0,
                   &qnm_camnoc_sf_cam_ife_0, &qns_mem_noc_sf_cam_ife_0 },
@@ -1694,6 +1704,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_0, &qnm_mnoc_sf_cam_ife_0,
                   &qnm_pcie_cam_ife_0 },
@@ -1701,6 +1712,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_1 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_1 },
 };
@@ -1719,6 +1731,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_1, &qnm_camnoc_icp_cam_ife_1,
                   &qnm_camnoc_sf_cam_ife_1, &qns_mem_noc_sf_cam_ife_1 },
@@ -1732,6 +1745,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_1, &qnm_mnoc_sf_cam_ife_1,
                   &qnm_pcie_cam_ife_1 },
@@ -1739,6 +1753,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_2 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_2 },
 };
@@ -1757,6 +1772,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_2 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_2, &qnm_camnoc_icp_cam_ife_2,
                   &qnm_camnoc_sf_cam_ife_2, &qns_mem_noc_sf_cam_ife_2 },
@@ -1770,6 +1786,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_2 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_2, &qnm_mnoc_sf_cam_ife_2,
                   &qnm_pcie_cam_ife_2 },
index fa09d51..baf3125 100644 (file)
@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
 extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
 extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
 extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_send(struct timer_list *arg);
 extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
 extern int dsp_cmx_del_conf_member(struct dsp *dsp);
 extern int dsp_cmx_del_conf(struct dsp_conf *conf);
index 357b875..61cb45c 100644 (file)
@@ -1614,7 +1614,7 @@ static u16        dsp_count; /* last sample count */
 static int     dsp_count_valid; /* if we have last sample count */
 
 void
-dsp_cmx_send(void *arg)
+dsp_cmx_send(struct timer_list *arg)
 {
        struct dsp_conf *conf;
        struct dsp_conf_member *member;
index 3860845..fae95f1 100644 (file)
@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
        }
 
        /* set sample timer */
-       timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
+       timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
        dsp_spl_tl.expires = jiffies + dsp_tics;
        dsp_spl_jiffies = dsp_spl_tl.expires;
        add_timer(&dsp_spl_tl);
index c9bc5a9..03c58e5 100644 (file)
@@ -406,15 +406,15 @@ static ssize_t interval_store(struct device *dev,
 
 static DEVICE_ATTR_RW(interval);
 
-static ssize_t hw_control_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t offloaded_show(struct device *dev,
+                             struct device_attribute *attr, char *buf)
 {
        struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
 
        return sprintf(buf, "%d\n", trigger_data->hw_control);
 }
 
-static DEVICE_ATTR_RO(hw_control);
+static DEVICE_ATTR_RO(offloaded);
 
 static struct attribute *netdev_trig_attrs[] = {
        &dev_attr_device_name.attr,
@@ -427,7 +427,7 @@ static struct attribute *netdev_trig_attrs[] = {
        &dev_attr_rx.attr,
        &dev_attr_tx.attr,
        &dev_attr_interval.attr,
-       &dev_attr_hw_control.attr,
+       &dev_attr_offloaded.attr,
        NULL
 };
 ATTRIBUTE_GROUPS(netdev_trig);
index 40cb3cb..60425c9 100644 (file)
@@ -1310,6 +1310,8 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
        jpeg->dev = &pdev->dev;
        jpeg->variant = of_device_get_match_data(jpeg->dev);
 
+       platform_set_drvdata(pdev, jpeg);
+
        ret = devm_of_platform_populate(&pdev->dev);
        if (ret) {
                v4l2_err(&jpeg->v4l2_dev, "Master of platform populate failed.");
@@ -1381,8 +1383,6 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
                  jpeg->variant->dev_name, jpeg->vdev->num,
                  VIDEO_MAJOR, jpeg->vdev->minor);
 
-       platform_set_drvdata(pdev, jpeg);
-
        pm_runtime_enable(&pdev->dev);
 
        return 0;
index 9ff439a..315e97a 100644 (file)
@@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
                return -EINVAL;
 
        if (*nplanes) {
+               if (*nplanes != q_data->fmt->num_planes)
+                       return -EINVAL;
                for (i = 0; i < *nplanes; i++)
                        if (sizes[i] < q_data->sizeimage[i])
                                return -EINVAL;
index 0bd2613..791bde6 100644 (file)
@@ -9,7 +9,9 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/math.h>
 #include <linux/mfd/syscon.h>
+#include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
@@ -1137,8 +1139,9 @@ __imx7_csi_video_try_fmt(struct v4l2_pix_format *pixfmt,
         * TODO: Implement configurable stride support.
         */
        walign = 8 * 8 / cc->bpp;
-       v4l_bound_align_image(&pixfmt->width, 1, 0xffff, walign,
-                             &pixfmt->height, 1, 0xffff, 1, 0);
+       pixfmt->width = clamp(round_up(pixfmt->width, walign), walign,
+                             round_down(65535U, walign));
+       pixfmt->height = clamp(pixfmt->height, 1U, 65535U);
 
        pixfmt->bytesperline = pixfmt->width * cc->bpp / 8;
        pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
index 7f0802a..3418d2d 100644 (file)
@@ -251,8 +251,8 @@ int pkt_session_unset_buffers(struct hfi_session_release_buffer_pkt *pkt,
 
                pkt->extradata_size = 0;
                pkt->shdr.hdr.size =
-                       struct_size((struct hfi_session_set_buffers_pkt *)0,
-                                   buffer_info, bd->num_buffers);
+                       struct_size_t(struct hfi_session_set_buffers_pkt,
+                                     buffer_info, bd->num_buffers);
        }
 
        pkt->response_req = bd->response_required;
index 5ac2a42..f4988f0 100644 (file)
@@ -45,7 +45,7 @@ static int uvc_control_add_xu_mapping(struct uvc_video_chain *chain,
        map->menu_names = NULL;
        map->menu_mapping = NULL;
 
-       map->menu_mask = BIT_MASK(xmap->menu_count);
+       map->menu_mask = GENMASK(xmap->menu_count - 1, 0);
 
        size = xmap->menu_count * sizeof(*map->menu_mapping);
        map->menu_mapping = kzalloc(size, GFP_KERNEL);
index d676cf6..3dae5e3 100644 (file)
@@ -195,7 +195,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
                }
        }
 
-       if (option->force_clkreq_0)
+       if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
                rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
        else
index cfebad5..f4ab094 100644 (file)
@@ -435,17 +435,10 @@ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
 
        rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
                        CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
@@ -476,17 +469,6 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
        else
                rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
 
        if (pcr->rtd3_en) {
index 91d240d..47ab72a 100644 (file)
@@ -327,12 +327,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
                }
        }
 
-
        /*
         * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
         * to drive low, and we forcibly request clock.
         */
-       if (option->force_clkreq_0)
+       if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
                rtsx_pci_write_register(pcr, PETXCFG,
                        FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
        else
index 9b42b20..79b18f6 100644 (file)
@@ -517,17 +517,10 @@ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
 
        /* Set mcu_cnt to 7 to ensure data can be sampled properly */
        rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
@@ -546,17 +539,6 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
 
        rts5260_init_hw(pcr);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
 
        return 0;
index b1e7603..94af6bf 100644 (file)
@@ -498,17 +498,10 @@ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
        u32 val;
 
        rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
@@ -554,17 +547,6 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
        else
                rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
 
        if (pcr->rtd3_en) {
index 32b7783..a3f4b52 100644 (file)
@@ -1326,8 +1326,11 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
                        return err;
        }
 
-       if (pcr->aspm_mode == ASPM_MODE_REG)
+       if (pcr->aspm_mode == ASPM_MODE_REG) {
                rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+               rtsx_pci_write_register(pcr, PETXCFG,
+                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+       }
 
        /* No CD interrupt if probing driver with card inserted.
         * So we need to initialize pcr->card_exist here.
index b488f70..05e2c15 100644 (file)
@@ -13,6 +13,8 @@
 
 #include <linux/mfd/tps6594.h>
 
+#define TPS6594_DEV_REV_1 0x08
+
 static irqreturn_t tps6594_esm_isr(int irq, void *dev_id)
 {
        struct platform_device *pdev = dev_id;
@@ -32,11 +34,26 @@ static int tps6594_esm_probe(struct platform_device *pdev)
 {
        struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
        struct device *dev = &pdev->dev;
+       unsigned int rev;
        int irq;
        int ret;
        int i;
 
-       for (i = 0 ; i < pdev->num_resources ; i++) {
+       /*
+        * Due to a bug in revision 1 of the PMIC, the GPIO3 used for the
+        * SoC ESM function is used to power the load switch instead.
+        * As a consequence, ESM can not be used on those PMIC.
+        * Check the version and return an error in case of revision 1.
+        */
+       ret = regmap_read(tps->regmap, TPS6594_REG_DEV_REV, &rev);
+       if (ret)
+               return dev_err_probe(dev, ret,
+                                    "Failed to read PMIC revision\n");
+       if (rev == TPS6594_DEV_REV_1)
+               return dev_err_probe(dev, -ENODEV,
+                             "ESM not supported for revision 1 PMIC\n");
+
+       for (i = 0; i < pdev->num_resources; i++) {
                irq = platform_get_irq_byname(pdev, pdev->resource[i].name);
                if (irq < 0)
                        return dev_err_probe(dev, irq, "Failed to get %s irq\n",
index f701efb..b6f4be2 100644 (file)
@@ -2097,14 +2097,14 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
        mmc_blk_urgent_bkops(mq, mqrq);
 }
 
-static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
+static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, enum mmc_issue_type issue_type)
 {
        unsigned long flags;
        bool put_card;
 
        spin_lock_irqsave(&mq->lock, flags);
 
-       mq->in_flight[mmc_issue_type(mq, req)] -= 1;
+       mq->in_flight[issue_type] -= 1;
 
        put_card = (mmc_tot_in_flight(mq) == 0);
 
@@ -2117,6 +2117,7 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
 static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
                                bool can_sleep)
 {
+       enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
        struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
        struct mmc_request *mrq = &mqrq->brq.mrq;
        struct mmc_host *host = mq->card->host;
@@ -2136,7 +2137,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
                        blk_mq_complete_request(req);
        }
 
-       mmc_blk_mq_dec_in_flight(mq, req);
+       mmc_blk_mq_dec_in_flight(mq, issue_type);
 }
 
 void mmc_blk_mq_recovery(struct mmc_queue *mq)
index 2d002c8..d0d6ffc 100644 (file)
@@ -338,13 +338,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
                                return;
                        }
                        for (len = 0; len < remain && len < host->fifo_width;) {
-                               /* SCR data must be read in big endian. */
-                               if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
-                                       *sgp = ioread32be(host->base +
-                                                         REG_DATA_WINDOW);
-                               else
-                                       *sgp = ioread32(host->base +
-                                                       REG_DATA_WINDOW);
+                               *sgp = ioread32(host->base + REG_DATA_WINDOW);
                                sgp++;
                                len += 4;
                        }
index a202a69..3215063 100644 (file)
@@ -29,9 +29,16 @@ struct f_sdhost_priv {
        bool enable_cmd_dat_delay;
 };
 
+static void *sdhci_f_sdhost_priv(struct sdhci_host *host)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+       return sdhci_pltfm_priv(pltfm_host);
+}
+
 static void sdhci_f_sdh30_soft_voltage_switch(struct sdhci_host *host)
 {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
        u32 ctrl = 0;
 
        usleep_range(2500, 3000);
@@ -64,7 +71,7 @@ static unsigned int sdhci_f_sdh30_get_min_clock(struct sdhci_host *host)
 
 static void sdhci_f_sdh30_reset(struct sdhci_host *host, u8 mask)
 {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
        u32 ctl;
 
        if (sdhci_readw(host, SDHCI_CLOCK_CONTROL) == 0)
@@ -95,30 +102,32 @@ static const struct sdhci_ops sdhci_f_sdh30_ops = {
        .set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
+static const struct sdhci_pltfm_data sdhci_f_sdh30_pltfm_data = {
+       .ops = &sdhci_f_sdh30_ops,
+       .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
+               | SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
+       .quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE
+               |  SDHCI_QUIRK2_TUNING_WORK_AROUND,
+};
+
 static int sdhci_f_sdh30_probe(struct platform_device *pdev)
 {
        struct sdhci_host *host;
        struct device *dev = &pdev->dev;
-       int irq, ctrl = 0, ret = 0;
+       int ctrl = 0, ret = 0;
        struct f_sdhost_priv *priv;
+       struct sdhci_pltfm_host *pltfm_host;
        u32 reg = 0;
 
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
-               return irq;
-
-       host = sdhci_alloc_host(dev, sizeof(struct f_sdhost_priv));
+       host = sdhci_pltfm_init(pdev, &sdhci_f_sdh30_pltfm_data,
+                               sizeof(struct f_sdhost_priv));
        if (IS_ERR(host))
                return PTR_ERR(host);
 
-       priv = sdhci_priv(host);
+       pltfm_host = sdhci_priv(host);
+       priv = sdhci_pltfm_priv(pltfm_host);
        priv->dev = dev;
 
-       host->quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
-                      SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
-       host->quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE |
-                       SDHCI_QUIRK2_TUNING_WORK_AROUND;
-
        priv->enable_cmd_dat_delay = device_property_read_bool(dev,
                                                "fujitsu,cmd-dat-delay-select");
 
@@ -126,18 +135,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
        if (ret)
                goto err;
 
-       platform_set_drvdata(pdev, host);
-
-       host->hw_name = "f_sdh30";
-       host->ops = &sdhci_f_sdh30_ops;
-       host->irq = irq;
-
-       host->ioaddr = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(host->ioaddr)) {
-               ret = PTR_ERR(host->ioaddr);
-               goto err;
-       }
-
        if (dev_of_node(dev)) {
                sdhci_get_of_property(pdev);
 
@@ -204,24 +201,24 @@ err_rst:
 err_clk:
        clk_disable_unprepare(priv->clk_iface);
 err:
-       sdhci_free_host(host);
+       sdhci_pltfm_free(pdev);
+
        return ret;
 }
 
 static int sdhci_f_sdh30_remove(struct platform_device *pdev)
 {
        struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
+       struct clk *clk_iface = priv->clk_iface;
+       struct reset_control *rst = priv->rst;
+       struct clk *clk = priv->clk;
 
-       sdhci_remove_host(host, readl(host->ioaddr + SDHCI_INT_STATUS) ==
-                         0xffffffff);
-
-       reset_control_assert(priv->rst);
-       clk_disable_unprepare(priv->clk);
-       clk_disable_unprepare(priv->clk_iface);
+       sdhci_pltfm_unregister(pdev);
 
-       sdhci_free_host(host);
-       platform_set_drvdata(pdev, NULL);
+       reset_control_assert(rst);
+       clk_disable_unprepare(clk);
+       clk_disable_unprepare(clk_iface);
 
        return 0;
 }
index db5e0dc..2bdebeb 100644 (file)
@@ -863,11 +863,9 @@ static int spmmc_drv_probe(struct platform_device *pdev)
        struct spmmc_host *host;
        int ret = 0;
 
-       mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
-       if (!mmc) {
-               ret = -ENOMEM;
-               goto probe_free_host;
-       }
+       mmc = devm_mmc_alloc_host(&pdev->dev, sizeof(struct spmmc_host));
+       if (!mmc)
+               return -ENOMEM;
 
        host = mmc_priv(mmc);
        host->mmc = mmc;
@@ -902,7 +900,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
 
        ret = mmc_of_parse(mmc);
        if (ret)
-               goto probe_free_host;
+               goto clk_disable;
 
        mmc->ops = &spmmc_ops;
        mmc->f_min = SPMMC_MIN_CLK;
@@ -911,7 +909,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
 
        ret = mmc_regulator_get_supply(mmc);
        if (ret)
-               goto probe_free_host;
+               goto clk_disable;
 
        if (!mmc->ocr_avail)
                mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
@@ -927,14 +925,17 @@ static int spmmc_drv_probe(struct platform_device *pdev)
        host->tuning_info.enable_tuning = 1;
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
-       mmc_add_host(mmc);
+       ret = mmc_add_host(mmc);
+       if (ret)
+               goto pm_disable;
 
-       return ret;
+       return 0;
 
-probe_free_host:
-       if (mmc)
-               mmc_free_host(mmc);
+pm_disable:
+       pm_runtime_disable(&pdev->dev);
 
+clk_disable:
+       clk_disable_unprepare(host->clk);
        return ret;
 }
 
@@ -948,7 +949,6 @@ static int spmmc_drv_remove(struct platform_device *dev)
        pm_runtime_put_noidle(&dev->dev);
        pm_runtime_disable(&dev->dev);
        platform_set_drvdata(dev, NULL);
-       mmc_free_host(host->mmc);
 
        return 0;
 }
index 521af92..bf2a92f 100644 (file)
@@ -1705,8 +1705,6 @@ static int wbsd_init(struct device *dev, int base, int irq, int dma,
 
                wbsd_release_resources(host);
                wbsd_free_mmc(dev);
-
-               mmc_free_host(mmc);
                return ret;
        }
 
index b9dbad3..fc5da5d 100644 (file)
@@ -660,10 +660,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
                return NULL;
        arp = (struct arp_pkt *)skb_network_header(skb);
 
-       /* Don't modify or load balance ARPs that do not originate locally
-        * (e.g.,arrive via a bridge).
+       /* Don't modify or load balance ARPs that do not originate
+        * from the bond itself or a VLAN directly above the bond.
         */
-       if (!bond_slave_has_mac_rx(bond, arp->mac_src))
+       if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
                return NULL;
 
        dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
index 484c9e3..447b06e 100644 (file)
@@ -5901,7 +5901,9 @@ void bond_setup(struct net_device *bond_dev)
 
        bond_dev->hw_features = BOND_VLAN_FEATURES |
                                NETIF_F_HW_VLAN_CTAG_RX |
-                               NETIF_F_HW_VLAN_CTAG_FILTER;
+                               NETIF_F_HW_VLAN_CTAG_FILTER |
+                               NETIF_F_HW_VLAN_STAG_RX |
+                               NETIF_F_HW_VLAN_STAG_FILTER;
 
        bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
        bond_dev->features |= bond_dev->hw_features;
index 4068d96..98c669a 100644 (file)
@@ -192,12 +192,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
 
                nla_peer = data[VXCAN_INFO_PEER];
                ifmp = nla_data(nla_peer);
-               err = rtnl_nla_parse_ifla(peer_tb,
-                                         nla_data(nla_peer) +
-                                         sizeof(struct ifinfomsg),
-                                         nla_len(nla_peer) -
-                                         sizeof(struct ifinfomsg),
-                                         NULL);
+               err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
                if (err < 0)
                        return err;
 
index 38b3c6d..b8bb9f3 100644 (file)
@@ -1006,6 +1006,10 @@ mt753x_trap_frames(struct mt7530_priv *priv)
        mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
                   MT753X_BPDU_CPU_ONLY);
 
+       /* Trap 802.1X PAE frames to the CPU port(s) */
+       mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK,
+                  MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY));
+
        /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */
        mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK,
                   MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY));
index 08045b0..17e42d3 100644 (file)
@@ -66,6 +66,8 @@ enum mt753x_id {
 /* Registers for BPDU and PAE frame control*/
 #define MT753X_BPC                     0x24
 #define  MT753X_BPDU_PORT_FW_MASK      GENMASK(2, 0)
+#define  MT753X_PAE_PORT_FW_MASK       GENMASK(18, 16)
+#define  MT753X_PAE_PORT_FW(x)         FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x)
 
 /* Register for :03 and :0E MAC DA frame control */
 #define MT753X_RGAC2                   0x2c
index c7d51a5..7af2f08 100644 (file)
@@ -3034,6 +3034,14 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
 
        /* If there is a GPIO connected to the reset pin, toggle it */
        if (gpiod) {
+               /* If the switch has just been reset and not yet completed
+                * loading EEPROM, the reset may interrupt the I2C transaction
+                * mid-byte, causing the first EEPROM read after the reset
+                * from the wrong location resulting in the switch booting
+                * to wrong mode and inoperable.
+                */
+               mv88e6xxx_g1_wait_eeprom_done(chip);
+
                gpiod_set_value_cansleep(gpiod, 1);
                usleep_range(10000, 20000);
                gpiod_set_value_cansleep(gpiod, 0);
index 8da46d2..bef879c 100644 (file)
@@ -1625,8 +1625,10 @@ static void felix_teardown(struct dsa_switch *ds)
        struct felix *felix = ocelot_to_felix(ocelot);
        struct dsa_port *dp;
 
+       rtnl_lock();
        if (felix->tag_proto_ops)
                felix->tag_proto_ops->teardown(ds);
+       rtnl_unlock();
 
        dsa_switch_for_each_available_port(dp, ds)
                ocelot_deinit_port(ocelot, dp->index);
index 1c11395..f16daa9 100644 (file)
@@ -1069,6 +1069,9 @@ static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns)
        if (gate_len_ns == U64_MAX)
                return U64_MAX;
 
+       if (gate_len_ns < VSC9959_TAS_MIN_GATE_LEN_NS)
+               return 0;
+
        return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC;
 }
 
index 392ec09..3e4fb3c 100644 (file)
@@ -1793,11 +1793,9 @@ static int b44_nway_reset(struct net_device *dev)
        b44_readphy(bp, MII_BMCR, &bmcr);
        b44_readphy(bp, MII_BMCR, &bmcr);
        r = -EINVAL;
-       if (bmcr & BMCR_ANENABLE) {
-               b44_writephy(bp, MII_BMCR,
-                            bmcr | BMCR_ANRESTART);
-               r = 0;
-       }
+       if (bmcr & BMCR_ANENABLE)
+               r = b44_writephy(bp, MII_BMCR,
+                                bmcr | BMCR_ANRESTART);
        spin_unlock_irq(&bp->lock);
 
        return r;
index 10c7c23..52ee375 100644 (file)
@@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
        int err;
 
        phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
-       if (!phy_dev || IS_ERR(phy_dev)) {
+       if (IS_ERR(phy_dev)) {
                dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
                return -ENODEV;
        }
index 8bcde0a..e2a4e10 100644 (file)
@@ -1508,6 +1508,8 @@ struct bnx2x {
        bool                    cnic_loaded;
        struct cnic_eth_dev     *(*cnic_probe)(struct net_device *);
 
+       bool                    nic_stopped;
+
        /* Flag that indicates that we can start looking for FCoE L2 queue
         * completions in the default status block.
         */
index 6ea5521..e9c1e1b 100644 (file)
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        bnx2x_add_all_napi(bp);
        DP(NETIF_MSG_IFUP, "napi added\n");
        bnx2x_napi_enable(bp);
+       bp->nic_stopped = false;
 
        if (IS_PF(bp)) {
                /* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ load_error2:
 load_error1:
        bnx2x_napi_disable(bp);
        bnx2x_del_all_napi(bp);
+       bp->nic_stopped = true;
 
        /* clear pf_load status, as it was already set */
        if (IS_PF(bp))
@@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
                if (!CHIP_IS_E1x(bp))
                        bnx2x_pf_disable(bp);
 
-               /* Disable HW interrupts, NAPI */
-               bnx2x_netif_stop(bp, 1);
-               /* Delete all NAPI objects */
-               bnx2x_del_all_napi(bp);
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
-               /* Release IRQs */
-               bnx2x_free_irq(bp);
+               if (!bp->nic_stopped) {
+                       /* Disable HW interrupts, NAPI */
+                       bnx2x_netif_stop(bp, 1);
+                       /* Delete all NAPI objects */
+                       bnx2x_del_all_napi(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
+                       /* Release IRQs */
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, false);
index 1e7a6f1..0d8e61c 100644 (file)
@@ -9474,15 +9474,18 @@ unload_error:
                }
        }
 
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 1);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-       if (CNIC_LOADED(bp))
-               bnx2x_del_all_napi_cnic(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 1);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+               if (CNIC_LOADED(bp))
+                       bnx2x_del_all_napi_cnic(bp);
 
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 
        /* Reset the chip, unless PCI function is offline. If we reach this
         * point following a PCI error handling, it means device is really
@@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                }
                bnx2x_drain_tx_queues(bp);
                bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
-               bnx2x_netif_stop(bp, 1);
-               bnx2x_del_all_napi(bp);
+               if (!bp->nic_stopped) {
+                       bnx2x_netif_stop(bp, 1);
+                       bnx2x_del_all_napi(bp);
 
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
 
-               bnx2x_free_irq(bp);
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, true);
index 0657a0f..8946a93 100644 (file)
@@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
        bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
 free_irq:
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 0);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 0);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 }
 
 static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
index 0092e46..cc3afb6 100644 (file)
@@ -617,7 +617,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
                };
 
                phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
-               if (!phydev || IS_ERR(phydev)) {
+               if (IS_ERR(phydev)) {
                        dev_err(kdev, "failed to register fixed PHY device\n");
                        return -ENODEV;
                }
index 5ef073a..cb2810f 100644 (file)
@@ -6881,7 +6881,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 
                        ri->data = NULL;
 
-                       skb = build_skb(data, frag_size);
+                       if (frag_size)
+                               skb = build_skb(data, frag_size);
+                       else
+                               skb = slab_build_skb(data);
                        if (!skb) {
                                tg3_frag_free(frag_size != 0, data);
                                goto drop_it_no_recycle;
index f6a0f12..82929ee 100644 (file)
@@ -5194,6 +5194,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
        unsigned int q;
        int err;
 
+       if (!device_may_wakeup(&bp->dev->dev))
+               phy_exit(bp->sgmii_phy);
+
        if (!netif_running(netdev))
                return 0;
 
@@ -5254,7 +5257,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
        if (!(bp->wol & MACB_WOL_ENABLED)) {
                rtnl_lock();
                phylink_stop(bp->phylink);
-               phy_exit(bp->sgmii_phy);
                rtnl_unlock();
                spin_lock_irqsave(&bp->lock, flags);
                macb_reset_hw(bp);
@@ -5284,6 +5286,9 @@ static int __maybe_unused macb_resume(struct device *dev)
        unsigned int q;
        int err;
 
+       if (!device_may_wakeup(&bp->dev->dev))
+               phy_init(bp->sgmii_phy);
+
        if (!netif_running(netdev))
                return 0;
 
@@ -5344,8 +5349,6 @@ static int __maybe_unused macb_resume(struct device *dev)
        macb_set_rx_mode(netdev);
        macb_restore_features(bp);
        rtnl_lock();
-       if (!device_may_wakeup(&bp->dev->dev))
-               phy_init(bp->sgmii_phy);
 
        phylink_start(bp->phylink);
        rtnl_unlock();
index c2e7037..7750702 100644 (file)
@@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
        tp->write_seq = snd_isn;
        tp->snd_nxt = snd_isn;
        tp->snd_una = snd_isn;
-       inet_sk(sk)->inet_id = get_random_u16();
+       atomic_set(&inet_sk(sk)->inet_id, get_random_u16());
        assign_rxopt(sk, opt);
 
        if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
index 1416262..e0a4cb7 100644 (file)
@@ -1186,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 
 static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
 {
-       struct device_node *node = pdev->dev.of_node;
        struct platform_device *ierb_pdev;
        struct device_node *ierb_node;
 
-       /* Don't register with the IERB if the PF itself is disabled */
-       if (!node || !of_device_is_available(node))
-               return 0;
-
        ierb_node = of_find_compatible_node(NULL, NULL,
                                            "fsl,ls1028a-enetc-ierb");
        if (!ierb_node || !of_device_is_available(ierb_node))
@@ -1208,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
        return enetc_ierb_register_pf(ierb_pdev, pdev);
 }
 
-static int enetc_pf_probe(struct pci_dev *pdev,
-                         const struct pci_device_id *ent)
+static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
 {
-       struct device_node *node = pdev->dev.of_node;
-       struct enetc_ndev_priv *priv;
-       struct net_device *ndev;
        struct enetc_si *si;
-       struct enetc_pf *pf;
        int err;
 
-       err = enetc_pf_register_with_ierb(pdev);
-       if (err == -EPROBE_DEFER)
-               return err;
-       if (err)
-               dev_warn(&pdev->dev,
-                        "Could not register with IERB driver: %pe, please update the device tree\n",
-                        ERR_PTR(err));
-
-       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
-       if (err)
-               return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
+       if (err) {
+               dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+               goto out;
+       }
 
        si = pci_get_drvdata(pdev);
        if (!si->hw.port || !si->hw.global) {
                err = -ENODEV;
                dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
-               goto err_map_pf_space;
+               goto out_pci_remove;
        }
 
        err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
                               &si->cbd_ring);
        if (err)
-               goto err_setup_cbdr;
+               goto out_pci_remove;
 
        err = enetc_init_port_rfs_memory(si);
        if (err) {
                dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
-               goto err_init_port_rfs;
+               goto out_teardown_cbdr;
        }
 
        err = enetc_init_port_rss_memory(si);
        if (err) {
                dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
-               goto err_init_port_rss;
+               goto out_teardown_cbdr;
        }
 
-       if (node && !of_device_is_available(node)) {
-               dev_info(&pdev->dev, "device is disabled, skipping\n");
-               err = -ENODEV;
-               goto err_device_disabled;
+       return si;
+
+out_teardown_cbdr:
+       enetc_teardown_cbdr(&si->cbd_ring);
+out_pci_remove:
+       enetc_pci_remove(pdev);
+out:
+       return ERR_PTR(err);
+}
+
+static void enetc_psi_destroy(struct pci_dev *pdev)
+{
+       struct enetc_si *si = pci_get_drvdata(pdev);
+
+       enetc_teardown_cbdr(&si->cbd_ring);
+       enetc_pci_remove(pdev);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+                         const struct pci_device_id *ent)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_ndev_priv *priv;
+       struct net_device *ndev;
+       struct enetc_si *si;
+       struct enetc_pf *pf;
+       int err;
+
+       err = enetc_pf_register_with_ierb(pdev);
+       if (err == -EPROBE_DEFER)
+               return err;
+       if (err)
+               dev_warn(&pdev->dev,
+                        "Could not register with IERB driver: %pe, please update the device tree\n",
+                        ERR_PTR(err));
+
+       si = enetc_psi_create(pdev);
+       if (IS_ERR(si)) {
+               err = PTR_ERR(si);
+               goto err_psi_create;
        }
 
        pf = enetc_si_priv(si);
@@ -1339,15 +1359,9 @@ err_alloc_si_res:
        si->ndev = NULL;
        free_netdev(ndev);
 err_alloc_netdev:
-err_init_port_rss:
-err_init_port_rfs:
-err_device_disabled:
 err_setup_mac_addresses:
-       enetc_teardown_cbdr(&si->cbd_ring);
-err_setup_cbdr:
-err_map_pf_space:
-       enetc_pci_remove(pdev);
-
+       enetc_psi_destroy(pdev);
+err_psi_create:
        return err;
 }
 
@@ -1370,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev)
        enetc_free_msix(priv);
 
        enetc_free_si_resources(priv);
-       enetc_teardown_cbdr(&si->cbd_ring);
 
        free_netdev(si->ndev);
 
-       enetc_pci_remove(pdev);
+       enetc_psi_destroy(pdev);
+}
+
+static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_si *si;
+
+       /* Only apply quirk for disabled functions. For the ones
+        * that are enabled, enetc_pf_probe() will apply it.
+        */
+       if (node && of_device_is_available(node))
+               return;
+
+       si = enetc_psi_create(pdev);
+       if (si)
+               enetc_psi_destroy(pdev);
 }
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+                       enetc_fixup_clear_rss_rfs);
 
 static const struct pci_device_id enetc_pf_id_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
index 52546f6..f276b5e 100644 (file)
@@ -464,9 +464,9 @@ static void hns3_dbg_fill_content(char *content, u16 len,
                if (result) {
                        if (item_len < strlen(result[i]))
                                break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                }
                pos += item_len;
                len -= item_len;
index 9f68900..b7b51e5 100644 (file)
@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
        if (!if_running)
                return;
 
+       if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
        netif_carrier_off(ndev);
        netif_tx_disable(ndev);
 
@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
        if (!if_running)
                return;
 
-       hns3_nic_reset_all_ring(priv->ae_handle);
+       if (hns3_nic_resetting(ndev))
+               return;
+
+       if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
+       if (hns3_nic_reset_all_ring(priv->ae_handle))
+               return;
+
+       clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
 
        for (i = 0; i < priv->vector_num; i++)
                hns3_vector_enable(&priv->tqp_vector[i]);
index 409db2e..0fb2eae 100644 (file)
@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char *content, u16 len,
                if (result) {
                        if (item_len < strlen(result[i]))
                                break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                }
                pos += item_len;
                len -= item_len;
index bf675c1..a940e35 100644 (file)
@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev);
 static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
 static void hclge_sync_fd_table(struct hclge_dev *hdev);
 static void hclge_update_fec_stats(struct hclge_dev *hdev);
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -7558,6 +7560,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
 {
+#define HCLGE_LINK_STATUS_WAIT_CNT  3
+
        struct hclge_desc desc;
        struct hclge_config_mac_mode_cmd *req =
                (struct hclge_config_mac_mode_cmd *)desc.data;
@@ -7582,9 +7586,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
        req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret)
+       if (ret) {
                dev_err(&hdev->pdev->dev,
                        "mac enable fail, ret =%d.\n", ret);
+               return;
+       }
+
+       if (!enable)
+               hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
+                                          HCLGE_LINK_STATUS_WAIT_CNT);
 }
 
 static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
@@ -7647,10 +7657,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
        } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
 }
 
-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt)
 {
-#define HCLGE_MAC_LINK_STATUS_NUM  100
-
        int link_status;
        int i = 0;
        int ret;
@@ -7663,13 +7672,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
                        return 0;
 
                msleep(HCLGE_LINK_STATUS_MS);
-       } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+       } while (++i < wait_cnt);
        return -EBUSY;
 }
 
 static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
                                          bool is_phy)
 {
+#define HCLGE_MAC_LINK_STATUS_NUM  100
+
        int link_ret;
 
        link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
@@ -7677,7 +7688,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
        if (is_phy)
                hclge_phy_link_status_wait(hdev, link_ret);
 
-       return hclge_mac_link_status_wait(hdev, link_ret);
+       return hclge_mac_link_status_wait(hdev, link_ret,
+                                         HCLGE_MAC_LINK_STATUS_NUM);
 }
 
 static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
@@ -10915,9 +10927,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
        u32 rx_pause, tx_pause;
        u8 flowctl;
 
-       if (!phydev->link || !phydev->autoneg)
+       if (!phydev->link)
                return 0;
 
+       if (!phydev->autoneg)
+               return hclge_mac_pause_setup_hw(hdev);
+
        local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
 
        if (phydev->pause)
index de509e5..c58c312 100644 (file)
@@ -1553,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
        return 0;
 }
 
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
 {
        bool tx_en, rx_en;
 
index 45dcfef..53eec6d 100644 (file)
@@ -245,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
                           u8 pfc_bitmap);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
 void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
 void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
index 113fcb3..832a2ae 100644 (file)
@@ -203,7 +203,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
        unsigned long offset;
 
        for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
-               asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
+               asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
 }
 
 /* replenish the buffers for a pool.  note that we don't need to
index 763d613..df76cda 100644 (file)
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
                                        struct ibmvnic_sub_crq_queue *);
 static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
 static void send_query_map(struct ibmvnic_adapter *adapter);
 static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
 static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
                                struct ibmvnic_long_term_buff *ltb);
 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
        char name[ETH_GSTRING_LEN];
@@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
 
 static int ibmvnic_login(struct net_device *netdev)
 {
+       unsigned long flags, timeout = msecs_to_jiffies(20000);
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       unsigned long timeout = msecs_to_jiffies(20000);
        int retry_count = 0;
        int retries = 10;
        bool retry;
@@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev)
 
                if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
-                       netdev_warn(netdev, "Login timed out, retrying...\n");
-                       retry = true;
-                       adapter->init_done_rc = 0;
-                       retry_count++;
-                       continue;
+                       netdev_warn(netdev, "Login timed out\n");
+                       adapter->login_pending = false;
+                       goto partial_reset;
                }
 
                if (adapter->init_done_rc == ABORTED) {
@@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev)
                                            "SCRQ irq initialization failed\n");
                                return rc;
                        }
+               /* Default/timeout error handling, reset and start fresh */
                } else if (adapter->init_done_rc) {
                        netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
                                    adapter->init_done_rc);
-                       return -EIO;
+
+partial_reset:
+                       /* adapter login failed, so free any CRQs or sub-CRQs
+                        * and register again before attempting to login again.
+                        * If we don't do this then the VIOS may think that
+                        * we are already logged in and reject any subsequent
+                        * attempts
+                        */
+                       netdev_warn(netdev,
+                                   "Freeing and re-registering CRQs before attempting to login again\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+                       release_sub_crqs(adapter, true);
+                       /* Much of this is similar logic as ibmvnic_probe(),
+                        * we are essentially re-initializing communication
+                        * with the server. We really should not run any
+                        * resets/failovers here because this is already a form
+                        * of reset and we do not want parallel resets occurring
+                        */
+                       do {
+                               reinit_init_done(adapter);
+                               /* Clear any failovers we got in the previous
+                                * pass since we are re-initializing the CRQ
+                                */
+                               adapter->failover_pending = false;
+                               release_crq_queue(adapter);
+                               /* If we don't sleep here then we risk an
+                                * unnecessary failover event from the VIOS.
+                                * This is a known VIOS issue caused by a vnic
+                                * device freeing and registering a CRQ too
+                                * quickly.
+                                */
+                               msleep(1500);
+                               /* Avoid any resets, since we are currently
+                                * resetting.
+                                */
+                               spin_lock_irqsave(&adapter->rwi_lock, flags);
+                               flush_reset_queue(adapter);
+                               spin_unlock_irqrestore(&adapter->rwi_lock,
+                                                      flags);
+
+                               rc = init_crq_queue(adapter);
+                               if (rc) {
+                                       netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+                                                  rc);
+                                       return -EIO;
+                               }
+
+                               rc = ibmvnic_reset_init(adapter, false);
+                               if (rc)
+                                       netdev_err(netdev, "login recovery: Reset init failed %d\n",
+                                                  rc);
+                               /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+                                * fails, since ibmvnic_reset_init will free
+                                * irq's in failure, we won't be able to receive
+                                * new CRQs so we need to keep trying. probe()
+                                * handles this similarly.
+                                */
+                       } while (rc == -EAGAIN && retry_count++ < retries);
                }
        } while (retry);
 
@@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev)
 
 static void release_login_buffer(struct ibmvnic_adapter *adapter)
 {
+       if (!adapter->login_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+                        adapter->login_buf_sz, DMA_TO_DEVICE);
        kfree(adapter->login_buf);
        adapter->login_buf = NULL;
 }
 
 static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
 {
+       if (!adapter->login_rsp_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
        kfree(adapter->login_rsp_buf);
        adapter->login_rsp_buf = NULL;
 }
@@ -4830,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
        if (rc) {
                adapter->login_pending = false;
                netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
-               goto buf_rsp_map_failed;
+               goto buf_send_failed;
        }
 
        return 0;
 
+buf_send_failed:
+       dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+                        DMA_FROM_DEVICE);
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
        adapter->login_rsp_buf = NULL;
@@ -5396,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        int num_tx_pools;
        int num_rx_pools;
        u64 *size_array;
+       u32 rsp_len;
        int i;
 
        /* CHECK: Test/set of login_pending does not need to be atomic
@@ -5407,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        }
        adapter->login_pending = false;
 
-       dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
-                        DMA_TO_DEVICE);
-       dma_unmap_single(dev, adapter->login_rsp_buf_token,
-                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
        /* If the number of queues requested can't be allocated by the
         * server, the login response will return with code 1. We will need
         * to resend the login buffer with fewer queues requested.
@@ -5447,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
                ibmvnic_reset(adapter, VNIC_RESET_FATAL);
                return -EIO;
        }
+
+       rsp_len = be32_to_cpu(login_rsp->len);
+       if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+           rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+               /* This can happen if a login request times out and there are
+                * 2 outstanding login requests sent, the LOGIN_RSP crq
+                * could have been for the older login request. So we are
+                * parsing the newer response buffer which may be incomplete
+                */
+               dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
+
        size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
                be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
        /* variable buffer sizes are not supported, so just read the
index 29ad179..a86bfa3 100644 (file)
@@ -2609,7 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                        retval = i40e_correct_mac_vlan_filters
                                (vsi, &tmp_add_list, &tmp_del_list,
                                 vlan_filters);
-               else
+               else if (pf->vf)
                        retval = i40e_correct_vf_mac_vlan_filters
                                (vsi, &tmp_add_list, &tmp_del_list,
                                 vlan_filters, pf->vf[vsi->vf_id].trusted);
@@ -2782,7 +2782,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        }
 
        /* if the VF is not trusted do not do promisc */
-       if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) {
+       if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
+           !pf->vf[vsi->vf_id].trusted) {
                clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
                goto out;
        }
index 9da0c87..f99c1f7 100644 (file)
@@ -210,11 +210,11 @@ read_nvm_exit:
  * @hw: pointer to the HW structure.
  * @module_pointer: module pointer location in words from the NVM beginning
  * @offset: offset in words from module start
- * @words: number of words to write
- * @data: buffer with words to write to the Shadow RAM
+ * @words: number of words to read
+ * @data: buffer with words to read to the Shadow RAM
  * @last_command: tells the AdminQ that this is the last command
  *
- * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ * Reads a 16 bit words buffer to the Shadow RAM using the admin command.
  **/
 static int i40e_read_nvm_aq(struct i40e_hw *hw,
                            u8 module_pointer, u32 offset,
@@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw,
         */
        if ((offset + words) > hw->nvm.sr_size)
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write error: offset %d beyond Shadow RAM limit %d\n",
+                          "NVM read error: offset %d beyond Shadow RAM limit %d\n",
                           (offset + words), hw->nvm.sr_size);
        else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
-               /* We can write only up to 4KB (one sector), in one AQ write */
+               /* We can read only up to 4KB (one sector), in one AQ write */
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write fail error: tried to write %d words, limit is %d.\n",
+                          "NVM read fail error: tried to read %d words, limit is %d.\n",
                           words, I40E_SR_SECTOR_SIZE_IN_WORDS);
        else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
                 != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
-               /* A single write cannot spread over two sectors */
+               /* A single read cannot spread over two sectors */
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
+                          "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n",
                           offset, words);
        else
                ret_code = i40e_aq_read_nvm(hw, module_pointer,
index 2f47cfa..a34303a 100644 (file)
@@ -1289,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
                fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
                fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+               fltr->ip_ver = 4;
                break;
        case AH_V4_FLOW:
        case ESP_V4_FLOW:
@@ -1300,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
                fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
                fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+               fltr->ip_ver = 4;
                break;
        case IPV4_USER_FLOW:
                fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
@@ -1312,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
                fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
                fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+               fltr->ip_ver = 4;
                break;
        case TCP_V6_FLOW:
        case UDP_V6_FLOW:
@@ -1330,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
                fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
                fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+               fltr->ip_ver = 6;
                break;
        case AH_V6_FLOW:
        case ESP_V6_FLOW:
@@ -1345,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                       sizeof(struct in6_addr));
                fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
                fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+               fltr->ip_ver = 6;
                break;
        case IPV6_USER_FLOW:
                memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
@@ -1361,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
                fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
                fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+               fltr->ip_ver = 6;
                break;
        case ETHER_FLOW:
                fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
@@ -1371,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                return -EINVAL;
        }
 
+       err = iavf_validate_fdir_fltr_masks(adapter, fltr);
+       if (err)
+               return err;
+
        if (iavf_fdir_is_dup_fltr(adapter, fltr))
                return -EEXIST;
 
@@ -1401,14 +1411,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
        if (fsp->flow_type & FLOW_MAC_EXT)
                return -EINVAL;
 
+       spin_lock_bh(&adapter->fdir_fltr_lock);
        if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                dev_err(&adapter->pdev->dev,
                        "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
                        IAVF_MAX_FDIR_FILTERS);
                return -ENOSPC;
        }
 
-       spin_lock_bh(&adapter->fdir_fltr_lock);
        if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
                dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
                spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1781,7 +1792,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
        case ETHTOOL_GRXCLSRLCNT:
                if (!FDIR_FLTR_SUPPORT(adapter))
                        break;
+               spin_lock_bh(&adapter->fdir_fltr_lock);
                cmd->rule_cnt = adapter->fdir_active_fltr;
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                cmd->data = IAVF_MAX_FDIR_FILTERS;
                ret = 0;
                break;
index 6146203..03e774b 100644 (file)
@@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = {
        }
 };
 
+static const struct in6_addr ipv6_addr_zero_mask = {
+       .in6_u = {
+               .u6_addr8 = {
+                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               }
+       }
+};
+
+/**
+ * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if all masks of packet fields are either full or empty. Returns
+ * error on at least one partial mask.
+ */
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+                                 struct iavf_fdir_fltr *fltr)
+{
+       if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_ver == 4) {
+               if (fltr->ip_mask.v4_addrs.src_ip &&
+                   fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.v4_addrs.dst_ip &&
+                   fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX)
+                       goto partial_mask;
+       } else if (fltr->ip_ver == 6) {
+               if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask,
+                          sizeof(struct in6_addr)) &&
+                   memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+                          sizeof(struct in6_addr)))
+                       goto partial_mask;
+
+               if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask,
+                          sizeof(struct in6_addr)) &&
+                   memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+                          sizeof(struct in6_addr)))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX)
+                       goto partial_mask;
+       }
+
+       if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX)
+               goto partial_mask;
+
+       if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.l4_header &&
+           fltr->ip_mask.l4_header != htonl(U32_MAX))
+               goto partial_mask;
+
+       return 0;
+
+partial_mask:
+       dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n");
+       return -EOPNOTSUPP;
+}
+
 /**
  * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
  * @fltr: Flow Director filter data structure
@@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
                VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
        }
 
-       fltr->ip_ver = 4;
-
        return 0;
 }
 
@@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
                VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
        }
 
-       fltr->ip_ver = 6;
-
        return 0;
 }
 
@@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
 {
        struct iavf_fdir_fltr *tmp;
+       bool ret = false;
 
+       spin_lock_bh(&adapter->fdir_fltr_lock);
        list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
                if (tmp->flow_type != fltr->flow_type)
                        continue;
@@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
                    !memcmp(&tmp->ip_data, &fltr->ip_data,
                            sizeof(fltr->ip_data)) &&
                    !memcmp(&tmp->ext_data, &fltr->ext_data,
-                           sizeof(fltr->ext_data)))
-                       return true;
+                           sizeof(fltr->ext_data))) {
+                       ret = true;
+                       break;
+               }
        }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
 
-       return false;
+       return ret;
 }
 
 /**
index 33c55c3..9eb9f73 100644 (file)
@@ -110,6 +110,8 @@ struct iavf_fdir_fltr {
        struct virtchnl_fdir_add vc_add_msg;
 };
 
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+                                 struct iavf_fdir_fltr *fltr);
 int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
index b678bdf..074bf94 100644 (file)
@@ -435,7 +435,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
        /* Receive Packet Data Buffer Size.
         * The Packet Data Buffer Size is defined in 128 byte units.
         */
-       rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+       rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+                                    BIT_ULL(ICE_RLAN_CTX_DBUF_S));
 
        /* use 32 byte descriptors */
        rlan_ctx.dsize = 1;
index ad0a007..8f232c4 100644 (file)
@@ -538,6 +538,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
                break;
        case DEVLINK_ESWITCH_MODE_SWITCHDEV:
        {
+               if (ice_is_adq_active(pf)) {
+                       dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+                       NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+                       return -EOPNOTSUPP;
+               }
+
                dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
                         pf->hw.pf_id);
                NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
index cf92c39..b40dfe6 100644 (file)
@@ -8823,6 +8823,11 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                                                  ice_setup_tc_block_cb,
                                                  np, np, true);
        case TC_SETUP_QDISC_MQPRIO:
+               if (ice_is_eswitch_mode_switchdev(pf)) {
+                       netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+                       return -EOPNOTSUPP;
+               }
+
                if (pf->adev) {
                        mutex_lock(&pf->adev_mutex);
                        device_lock(&pf->adev->dev);
index 1f66914..31314e7 100644 (file)
@@ -1131,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
        if (!vf)
                return -EINVAL;
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1246,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
                goto out_put_vf;
        }
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1300,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
                return -EOPNOTSUPP;
        }
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1613,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
        if (!vf)
                return -EINVAL;
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
index b26ce44..ea3310b 100644 (file)
@@ -186,25 +186,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
 }
 
 /**
- * ice_check_vf_ready_for_reset - check if VF is ready to be reset
- * @vf: VF to check if it's ready to be reset
- *
- * The purpose of this function is to ensure that the VF is not in reset,
- * disabled, and is both initialized and active, thus enabling us to safely
- * initialize another reset.
- */
-int ice_check_vf_ready_for_reset(struct ice_vf *vf)
-{
-       int ret;
-
-       ret = ice_check_vf_ready_for_cfg(vf);
-       if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
-               ret = -EAGAIN;
-
-       return ret;
-}
-
-/**
  * ice_trigger_vf_reset - Reset a VF on HW
  * @vf: pointer to the VF structure
  * @is_vflr: true if VFLR was issued, false if not
@@ -631,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
                return 0;
        }
 
+       if (flags & ICE_VF_RESET_LOCK)
+               mutex_lock(&vf->cfg_lock);
+       else
+               lockdep_assert_held(&vf->cfg_lock);
+
        if (ice_is_vf_disabled(vf)) {
                vsi = ice_get_vf_vsi(vf);
                if (!vsi) {
                        dev_dbg(dev, "VF is already removed\n");
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out_unlock;
                }
                ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
 
@@ -644,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 
                dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
                        vf->vf_id);
-               return 0;
+               goto out_unlock;
        }
 
-       if (flags & ICE_VF_RESET_LOCK)
-               mutex_lock(&vf->cfg_lock);
-       else
-               lockdep_assert_held(&vf->cfg_lock);
-
        /* Set VF disable bit state here, before triggering reset */
        set_bit(ICE_VF_STATE_DIS, vf->vf_states);
        ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
index 67172fd..48fea6f 100644 (file)
@@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf);
 struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
 bool ice_is_vf_disabled(struct ice_vf *vf);
 int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
-int ice_check_vf_ready_for_reset(struct ice_vf *vf);
 void ice_set_vf_state_dis(struct ice_vf *vf);
 bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
 void
index efbc296..dcf628b 100644 (file)
@@ -3947,7 +3947,6 @@ error_handler:
                ice_vc_notify_vf_link_state(vf);
                break;
        case VIRTCHNL_OP_RESET_VF:
-               clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
                ops->reset_vf(vf);
                break;
        case VIRTCHNL_OP_ADD_ETH_ADDR:
index 405886e..319c544 100644 (file)
@@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
                return;
        }
 
-       spin_lock_init(&adapter->tmreg_lock);
-       INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
-       if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
-               INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
-                                 igb_ptp_overflow_check);
-
-       adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
-       adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
-
-       igb_ptp_reset(adapter);
-
        adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
                                                &adapter->pdev->dev);
        if (IS_ERR(adapter->ptp_clock)) {
@@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter)
                dev_info(&adapter->pdev->dev, "added PHC on %s\n",
                         adapter->netdev->name);
                adapter->ptp_flags |= IGB_PTP_ENABLED;
+
+               spin_lock_init(&adapter->tmreg_lock);
+               INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+               if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+                       INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+                                         igb_ptp_overflow_check);
+
+               adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+               adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+               igb_ptp_reset(adapter);
        }
 }
 
index 9db384f..38901d2 100644 (file)
@@ -195,6 +195,10 @@ struct igc_adapter {
        u32 qbv_config_change_errors;
        bool qbv_transition;
        unsigned int qbv_count;
+       /* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+        * are protected by the qbv_tx_lock.
+        */
+       spinlock_t qbv_tx_lock;
 
        /* OS defined structs */
        struct pci_dev *pdev;
index 44a5070..2f780cc 100644 (file)
 #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
 #define IGC_PTM_CTRL_EN                BIT(30) /* Enable PTM */
 #define IGC_PTM_CTRL_TRIG      BIT(31) /* PTM Cycle trigger */
-#define IGC_PTM_CTRL_SHRT_CYC(usec)    (((usec) & 0x2f) << 2)
+#define IGC_PTM_CTRL_SHRT_CYC(usec)    (((usec) & 0x3f) << 2)
 #define IGC_PTM_CTRL_PTM_TO(usec)      (((usec) & 0xff) << 8)
 
 #define IGC_PTM_SHORT_CYC_DEFAULT      10  /* Default Short/interrupted cycle interval */
index bdeb367..6f557e8 100644 (file)
@@ -4801,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
        adapter->nfc_rule_count = 0;
 
        spin_lock_init(&adapter->stats64_lock);
+       spin_lock_init(&adapter->qbv_tx_lock);
        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
        adapter->flags |= IGC_FLAG_HAS_MSIX;
 
@@ -6119,15 +6120,15 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
        return igc_tsn_offload_apply(adapter);
 }
 
-static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
 {
+       unsigned long flags;
        int i;
 
        adapter->base_time = 0;
        adapter->cycle_time = NSEC_PER_SEC;
        adapter->taprio_offload_enable = false;
        adapter->qbv_config_change_errors = 0;
-       adapter->qbv_transition = false;
        adapter->qbv_count = 0;
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -6136,10 +6137,28 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
                ring->start_time = 0;
                ring->end_time = NSEC_PER_SEC;
                ring->max_sdu = 0;
+       }
+
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+       adapter->qbv_transition = false;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
                ring->oper_gate_closed = false;
                ring->admin_gate_closed = false;
        }
 
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+       return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+       igc_qbv_clear_schedule(adapter);
+
        return 0;
 }
 
@@ -6150,6 +6169,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
        struct igc_hw *hw = &adapter->hw;
        u32 start_time = 0, end_time = 0;
        struct timespec64 now;
+       unsigned long flags;
        size_t n;
        int i;
 
@@ -6217,6 +6237,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                start_time += e->interval;
        }
 
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
        /* Check whether a queue gets configured.
         * If not, set the start and end time to be end time.
         */
@@ -6241,6 +6263,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                }
        }
 
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
        for (i = 0; i < adapter->num_tx_queues; i++) {
                struct igc_ring *ring = adapter->tx_ring[i];
                struct net_device *dev = adapter->netdev;
@@ -6619,8 +6643,11 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
 {
        struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
                                                   hrtimer);
+       unsigned long flags;
        unsigned int i;
 
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
        adapter->qbv_transition = true;
        for (i = 0; i < adapter->num_tx_queues; i++) {
                struct igc_ring *tx_ring = adapter->tx_ring[i];
@@ -6633,6 +6660,9 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
                }
        }
        adapter->qbv_transition = false;
+
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
        return HRTIMER_NORESTART;
 }
 
index 1cc6af2..565320e 100644 (file)
@@ -55,7 +55,7 @@ static int octep_send_mbox_req(struct octep_device *oct,
        list_add_tail(&d->list, &oct->ctrl_req_wait_list);
        ret = wait_event_interruptible_timeout(oct->ctrl_req_wait_q,
                                               (d->done != 0),
-                                              jiffies + msecs_to_jiffies(500));
+                                              msecs_to_jiffies(500));
        list_del(&d->list);
        if (ret == 0 || ret == 1)
                return -EAGAIN;
index 43eb6e8..4424de2 100644 (file)
@@ -1038,6 +1038,10 @@ static void octep_device_cleanup(struct octep_device *oct)
 {
        int i;
 
+       oct->poll_non_ioq_intr = false;
+       cancel_delayed_work_sync(&oct->intr_poll_task);
+       cancel_work_sync(&oct->ctrl_mbox_task);
+
        dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n");
 
        for (i = 0; i < OCTEP_MAX_VF; i++) {
@@ -1200,14 +1204,11 @@ static void octep_remove(struct pci_dev *pdev)
        if (!oct)
                return;
 
-       cancel_work_sync(&oct->tx_timeout_task);
-       cancel_work_sync(&oct->ctrl_mbox_task);
        netdev = oct->netdev;
        if (netdev->reg_state == NETREG_REGISTERED)
                unregister_netdev(netdev);
 
-       oct->poll_non_ioq_intr = false;
-       cancel_delayed_work_sync(&oct->intr_poll_task);
+       cancel_work_sync(&oct->tx_timeout_task);
        octep_device_cleanup(oct);
        pci_release_mem_regions(pdev);
        free_netdev(netdev);
index 04b0e88..c2f6867 100644 (file)
@@ -4270,9 +4270,10 @@ rx_frscfg:
        if (link < 0)
                return NIX_AF_ERR_RX_LINK_INVALID;
 
-       nix_find_link_frs(rvu, req, pcifunc);
 
 linkcfg:
+       nix_find_link_frs(rvu, req, pcifunc);
+
        cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link));
        cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16);
        if (req->update_minlen)
index a9a1028..de31717 100644 (file)
@@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n,
 
 static bool __prestera_fi_is_direct(struct fib_info *fi)
 {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
 
        if (fib_info_num_path(fi) == 1) {
-               fib_nh = fib_info_nh(fi, 0);
-               if (fib_nh->fib_nh_gw_family == AF_UNSPEC)
+               fib_nhc = fib_info_nhc(fi, 0);
+               if (fib_nhc->nhc_gw_family == AF_UNSPEC)
                        return true;
        }
 
@@ -261,7 +261,7 @@ static bool
 __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
                                       struct net_device *dev)
 {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
        struct fib_result res;
        bool reachable;
 
@@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
 
        if (!prestera_util_kern_get_route(&res, tb_id, addr))
                if (prestera_fi_is_direct(res.fi)) {
-                       fib_nh = fib_info_nh(res.fi, 0);
-                       if (dev == fib_nh->fib_nh_dev)
+                       fib_nhc = fib_info_nhc(res.fi, 0);
+                       if (dev == fib_nhc->nhc_dev)
                                reachable = true;
                }
 
@@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n)
        if (info->family == AF_INET) {
                fen4_info = container_of(info, struct fib_entry_notifier_info,
                                         info);
-               return &fib_info_nh(fen4_info->fi, n)->nh_common;
+               return fib_info_nhc(fen4_info->fi, n);
        } else if (info->family == AF_INET6) {
                fen6_info = container_of(info, struct fib6_entry_notifier_info,
                                         info);
index 985cff9..3b651ef 100644 (file)
@@ -221,9 +221,13 @@ void mtk_wed_fe_reset(void)
 
        for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
                struct mtk_wed_hw *hw = hw_list[i];
-               struct mtk_wed_device *dev = hw->wed_dev;
+               struct mtk_wed_device *dev;
                int err;
 
+               if (!hw)
+                       break;
+
+               dev = hw->wed_dev;
                if (!dev || !dev->wlan.reset)
                        continue;
 
@@ -244,8 +248,12 @@ void mtk_wed_fe_reset_complete(void)
 
        for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
                struct mtk_wed_hw *hw = hw_list[i];
-               struct mtk_wed_device *dev = hw->wed_dev;
+               struct mtk_wed_device *dev;
+
+               if (!hw)
+                       break;
 
+               dev = hw->wed_dev;
                if (!dev || !dev->wlan.reset_complete)
                        continue;
 
index b012833..e869c65 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
 
 #include "reporter_vnic.h"
+#include "en_stats.h"
 #include "devlink.h"
 
 #define VNIC_ENV_GET64(vnic_env_stats, c) \
@@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
        if (err)
                return err;
 
-       err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
-                                       VNIC_ENV_GET64(&vnic, total_error_queues));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
-                                       VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, comp_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, async_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
-                                       VNIC_ENV_GET64(&vnic, cq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
-                                       VNIC_ENV_GET64(&vnic, invalid_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
-                                       VNIC_ENV_GET64(&vnic, quota_exceeded_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
-                                       VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
-       if (err)
-               return err;
+       if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues",
+                                               VNIC_ENV_GET(&vnic, total_error_queues));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow",
+                                               VNIC_ENV_GET(&vnic,
+                                                            send_queue_priority_update_flow));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, eq_overrun_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, comp_eq_overrun));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, async_eq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun",
+                                               VNIC_ENV_GET(&vnic, cq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, invalid_command_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command",
+                                               VNIC_ENV_GET(&vnic, invalid_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, quota_exceeded_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command",
+                                               VNIC_ENV_GET(&vnic, quota_exceeded_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              nic_receive_steering_discard));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              generated_pkt_steering_fail));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+               if (err)
+                       return err;
+       }
 
        err = devlink_fmsg_obj_nest_end(fmsg);
        if (err)
index 0c88cf4..1730f6a 100644 (file)
@@ -1461,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
                attr = mlx5e_tc_get_encap_attr(flow);
                esw_attr = attr->esw_attr;
 
-               if (flow_flag_test(flow, SLOW))
+               if (flow_flag_test(flow, SLOW)) {
                        mlx5e_tc_unoffload_from_slow_path(esw, flow);
-               else
+               } else {
                        mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+                       mlx5e_tc_unoffload_flow_post_acts(flow);
+               }
 
                mlx5e_tc_detach_mod_hdr(priv, flow, attr);
                attr->modify_hdr = NULL;
index 9e8e618..ecfe93a 100644 (file)
@@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode {
  * MLX5E_XDP_XMIT_MODE_XSK:
  *    none.
  */
+#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4
+
 union mlx5e_xdp_info {
        enum mlx5e_xdp_xmit_mode mode;
        union {
index 1c82011..f7b4941 100644 (file)
@@ -1298,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
 {
        struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
        int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
-       int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
-                                                          * entries of all xmit_modes.
-                                                          */
+       int entries;
        size_t size;
 
+       /* upper bound for maximum num of entries of all xmit_modes. */
+       entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS *
+                                    MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO);
+
        size = array_size(sizeof(*xdpi_fifo->xi), entries);
        xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
        if (!xdpi_fifo->xi)
@@ -5266,6 +5268,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                          struct net_device *netdev)
 {
+       const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5e_flow_steering *fs;
        int err;
@@ -5294,9 +5297,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
        mlx5e_health_create_reporters(priv);
+
+       /* If netdev is already registered (e.g. move from uplink to nic profile),
+        * RTNL lock must be held before triggering netdev notifiers.
+        */
+       if (take_rtnl)
+               rtnl_lock();
+
        /* update XDP supported features */
        mlx5e_set_xdp_feature(netdev);
 
+       if (take_rtnl)
+               rtnl_unlock();
+
        return 0;
 }
 
index 9237763..31708d5 100644 (file)
@@ -1943,9 +1943,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_flow_attr *attr = flow->attr;
-       struct mlx5_esw_flow_attr *esw_attr;
 
-       esw_attr = attr->esw_attr;
        mlx5e_put_flow_tunnel_id(flow);
 
        remove_unready_flow(flow);
@@ -1966,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 
        mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
 
-       if (esw_attr->int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
-
-       if (esw_attr->dest_int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
-
        if (flow_flag_test(flow, L3_TO_L2_DECAP))
                mlx5e_detach_decap(priv, flow);
 
@@ -4268,6 +4260,7 @@ static void
 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
 {
        struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+       struct mlx5_esw_flow_attr *esw_attr;
 
        if (!attr)
                return;
@@ -4285,6 +4278,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
                mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
        }
 
+       if (mlx5e_is_eswitch_flow(flow)) {
+               esw_attr = attr->esw_attr;
+
+               if (esw_attr->int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->int_port);
+
+               if (esw_attr->dest_int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->dest_int_port);
+       }
+
        mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
 
        free_branch_attr(flow, attr->branch_true);
index af779c7..fdf2be5 100644 (file)
@@ -60,7 +60,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
        }  else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
                memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
                dl_port->attrs.switch_id.id_len = ppid.id_len;
-               devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+               devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
                                              vport_num - 1, false);
        }
        return dl_port;
index aab7059..244cfd4 100644 (file)
@@ -245,12 +245,20 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
            mlx5_lag_is_shared_fdb(dev) &&
            mlx5_lag_is_master(dev)) {
                struct mlx5_core_dev *peer_dev;
-               int i;
+               int i, j;
 
                mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
                        err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
                                                          (!disconnect) ? ft->id : 0);
                        if (err && !disconnect) {
+                               mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) {
+                                       if (j < i)
+                                               mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1,
+                                                                           ns->root_ft->id);
+                                       else
+                                               break;
+                               }
+
                                MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
                                MLX5_SET(set_flow_table_root_in, in, table_id,
                                         ns->root_ft->id);
index d3a3fe4..7d9bbb4 100644 (file)
@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
        for (i = 0; i < ldev->ports; i++) {
                for (j = 0; j < ldev->buckets; j++) {
                        idx = i * ldev->buckets + j;
-                       if (ldev->v2p_map[i] == ports[i])
+                       if (ldev->v2p_map[idx] == ports[idx])
                                continue;
 
                        dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
index 973babf..377372f 100644 (file)
@@ -227,10 +227,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
        clock = container_of(timer, struct mlx5_clock, timer);
        mdev = container_of(clock, struct mlx5_core_dev, clock);
 
+       if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
+
        write_seqlock_irqsave(&clock->lock, flags);
        timecounter_read(&timer->tc);
        mlx5_update_clock_info_page(mdev);
        write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
        schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
 }
 
index f42abc2..72ae560 100644 (file)
@@ -1989,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
        mlx5_enter_error_state(dev, false);
        mlx5_error_sw_reset(dev);
-       mlx5_unload_one(dev, true);
+       mlx5_unload_one(dev, false);
        mlx5_drain_health_wq(dev);
        mlx5_pci_disable_device(dev);
 
index c4be257..682d3dc 100644 (file)
@@ -361,7 +361,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16
 
 static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
 {
-       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1
                          : vport;
 }
 
index 4e42a3b..a2fc937 100644 (file)
@@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
                host_total_vfs = MLX5_GET(query_esw_functions_out, out,
                                          host_params_context.host_total_vfs);
                kvfree(out);
-               if (host_total_vfs)
-                       return host_total_vfs;
+               return host_total_vfs;
        }
 
 done:
index d6947fe..8ca534e 100644 (file)
@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
        u32 chunk_size;
        u32 index;
 
-       chunk_size = ilog2(num_of_actions);
+       chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
        /* HW modify action index granularity is at least 64B */
        chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
 
index f0b2963..973de2a 100644 (file)
@@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
-       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3),
-       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8),
+       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4),
+       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
index c968309..51eea1f 100644 (file)
@@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci,
                                    struct sk_buff *skb,
                                    enum mlxsw_pci_cqe_v cqe_v, char *cqe)
 {
+       u8 ts_type;
+
        if (cqe_v != MLXSW_PCI_CQE_V2)
                return;
 
-       if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) !=
-           MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC)
+       ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe);
+
+       if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC &&
+           ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC)
                return;
 
        mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe);
index 8165bf3..17160e8 100644 (file)
@@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1);
  */
 MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12);
 
-/* reg_sspr_sub_port
- * Virtual port within the physical port.
- * Should be set to 0 when virtual ports are not enabled on the port.
- *
- * Access: RW
- */
-MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8);
-
 /* reg_sspr_system_port
  * Unique identifier within the stacking domain that represents all the ports
  * that are available in the system (external ports).
@@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port)
        MLXSW_REG_ZERO(sspr, payload);
        mlxsw_reg_sspr_m_set(payload, 1);
        mlxsw_reg_sspr_local_port_set(payload, local_port);
-       mlxsw_reg_sspr_sub_port_set(payload, 0);
        mlxsw_reg_sspr_system_port_set(payload, local_port);
 }
 
index e4f4cde..b1178b7 100644 (file)
@@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule,
                                       key->vrid, GENMASK(7, 0));
        mlxsw_sp_acl_rulei_keymask_u32(rulei,
                                       MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
-                                      key->vrid >> 8, GENMASK(2, 0));
+                                      key->vrid >> 8, GENMASK(3, 0));
        switch (key->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key);
index 4dea39f..ae2d6f1 100644 (file)
@@ -171,7 +171,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = {
        MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3),
+       MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
@@ -321,7 +321,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = {
        MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8),
-       MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true),
+       MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = {
index a499e46..c2ad092 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/ethtool.h>
 #include <linux/filter.h>
 #include <linux/mm.h>
+#include <linux/pci.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
 static int mana_dealloc_queues(struct net_device *ndev)
 {
        struct mana_port_context *apc = netdev_priv(ndev);
+       unsigned long timeout = jiffies + 120 * HZ;
        struct gdma_dev *gd = apc->ac->gdma_dev;
        struct mana_txq *txq;
+       struct sk_buff *skb;
        int i, err;
+       u32 tsleep;
 
        if (apc->port_is_up)
                return -EINVAL;
@@ -2363,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev)
         * to false, but it doesn't matter since mana_start_xmit() drops any
         * new packets due to apc->port_is_up being false.
         *
-        * Drain all the in-flight TX packets
+        * Drain all the in-flight TX packets.
+        * A timeout of 120 seconds for all the queues is used.
+        * This will break the while loop when h/w is not responding.
+        * This value of 120 has been decided here considering max
+        * number of queues.
         */
+
        for (i = 0; i < apc->num_queues; i++) {
                txq = &apc->tx_qp[i].txq;
-
-               while (atomic_read(&txq->pending_sends) > 0)
-                       usleep_range(1000, 2000);
+               tsleep = 1000;
+               while (atomic_read(&txq->pending_sends) > 0 &&
+                      time_before(jiffies, timeout)) {
+                       usleep_range(tsleep, tsleep + 1000);
+                       tsleep <<= 1;
+               }
+               if (atomic_read(&txq->pending_sends)) {
+                       err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+                       if (err) {
+                               netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+                                          err, atomic_read(&txq->pending_sends),
+                                          txq->gdma_txq_id);
+                       }
+                       break;
+               }
        }
 
+       for (i = 0; i < apc->num_queues; i++) {
+               txq = &apc->tx_qp[i].txq;
+               while ((skb = skb_dequeue(&txq->pending_skbs))) {
+                       mana_unmap_skb(skb, apc);
+                       dev_kfree_skb_any(skb);
+               }
+               atomic_set(&txq->pending_sends, 0);
+       }
        /* We're 100% sure the queues can no longer be woken up, because
         * we're sure now mana_poll_tx_cq() can't be running.
         */
index 612b001..432fb93 100644 (file)
@@ -1817,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
 static void ionic_tx_timeout_work(struct work_struct *ws)
 {
        struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+       int err;
 
        if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                return;
@@ -1829,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 
        mutex_lock(&lif->queue_lock);
        ionic_stop_queues_reconfig(lif);
-       ionic_start_queues_reconfig(lif);
+       err = ionic_start_queues_reconfig(lif);
        mutex_unlock(&lif->queue_lock);
+
+       if (err)
+               dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__);
 }
 
 static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -2800,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif,
                        if (err) {
                                dev_err(lif->ionic->dev,
                                        "CMB restore failed: %d\n", err);
-                               goto errout;
+                               goto err_out;
                        }
                }
 
-               ionic_start_queues_reconfig(lif);
-       } else {
-               /* This was detached in ionic_stop_queues_reconfig() */
-               netif_device_attach(lif->netdev);
+               err = ionic_start_queues_reconfig(lif);
+               if (err) {
+                       dev_err(lif->ionic->dev,
+                               "CMB reconfig failed: %d\n", err);
+                       goto err_out;
+               }
        }
 
-errout:
+err_out:
+       /* This was detached in ionic_stop_queues_reconfig() */
+       netif_device_attach(lif->netdev);
+
        return err;
 }
 
index 4b004a7..99df00c 100644 (file)
@@ -176,6 +176,15 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
 }
 #endif
 
+static int __maybe_unused qede_suspend(struct device *dev)
+{
+       dev_info(dev, "Device does not support suspend operation\n");
+
+       return -EOPNOTSUPP;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(qede_pm_ops, qede_suspend, NULL);
+
 static const struct pci_error_handlers qede_err_handler = {
        .error_detected = qede_io_error_detected,
 };
@@ -190,6 +199,7 @@ static struct pci_driver qede_pci_driver = {
        .sriov_configure = qede_sriov_configure,
 #endif
        .err_handler = &qede_err_handler,
+       .driver.pm = &qede_pm_ops,
 };
 
 static struct qed_eth_cb_ops qede_ll_ops = {
index 7adde96..35d8e98 100644 (file)
@@ -1194,7 +1194,7 @@ int ef100_probe_netdev_pf(struct efx_nic *efx)
                net_dev->features |= NETIF_F_HW_TC;
                efx->fixed_features |= NETIF_F_HW_TC;
        }
-       return rc;
+       return 0;
 }
 
 int ef100_probe_vf(struct efx_nic *efx)
index cf1d67b..c3dc88e 100644 (file)
@@ -428,7 +428,7 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EF4_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
index 19a0b85..563c1e3 100644 (file)
@@ -426,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
index b55fd33..526da43 100644 (file)
@@ -426,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
index 15ebd39..fe268b6 100644 (file)
@@ -1657,10 +1657,10 @@ int efx_init_tc(struct efx_nic *efx)
        rc = efx_tc_configure_fallback_acts_reps(efx);
        if (rc)
                return rc;
-       efx->tc->up = true;
        rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
        if (rc)
                return rc;
+       efx->tc->up = true;
        return 0;
 }
 
index b15dd9a..1b55928 100644 (file)
@@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused,
 
                write_pnet(&port->pnet, newnet);
 
-               ipvlan_migrate_l3s_hook(oldnet, newnet);
+               if (port->mode == IPVLAN_MODE_L3S)
+                       ipvlan_migrate_l3s_hook(oldnet, newnet);
                break;
        }
        case NETDEV_UNREGISTER:
index 984dfa5..144ec75 100644 (file)
@@ -743,7 +743,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                u64_stats_update_begin(&rxsc_stats->syncp);
                rxsc_stats->stats.InPktsLate++;
                u64_stats_update_end(&rxsc_stats->syncp);
-               secy->netdev->stats.rx_dropped++;
+               DEV_STATS_INC(secy->netdev, rx_dropped);
                return false;
        }
 
@@ -767,7 +767,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                        rxsc_stats->stats.InPktsNotValid++;
                        u64_stats_update_end(&rxsc_stats->syncp);
                        this_cpu_inc(rx_sa->stats->InPktsNotValid);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                        return false;
                }
 
@@ -1069,7 +1069,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
                        u64_stats_update_begin(&secy_stats->syncp);
                        secy_stats->stats.InPktsNoTag++;
                        u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                        continue;
                }
 
@@ -1179,7 +1179,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                u64_stats_update_begin(&secy_stats->syncp);
                secy_stats->stats.InPktsBadTag++;
                u64_stats_update_end(&secy_stats->syncp);
-               secy->netdev->stats.rx_errors++;
+               DEV_STATS_INC(secy->netdev, rx_errors);
                goto drop_nosa;
        }
 
@@ -1196,7 +1196,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                        u64_stats_update_begin(&rxsc_stats->syncp);
                        rxsc_stats->stats.InPktsNotUsingSA++;
                        u64_stats_update_end(&rxsc_stats->syncp);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                        if (active_rx_sa)
                                this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
                        goto drop_nosa;
@@ -1230,7 +1230,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                        u64_stats_update_begin(&rxsc_stats->syncp);
                        rxsc_stats->stats.InPktsLate++;
                        u64_stats_update_end(&rxsc_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                        goto drop;
                }
        }
@@ -1271,7 +1271,7 @@ deliver:
        if (ret == NET_RX_SUCCESS)
                count_rx(dev, len);
        else
-               macsec->secy.netdev->stats.rx_dropped++;
+               DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
 
        rcu_read_unlock();
 
@@ -1308,7 +1308,7 @@ nosci:
                        u64_stats_update_begin(&secy_stats->syncp);
                        secy_stats->stats.InPktsNoSCI++;
                        u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_errors++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_errors);
                        continue;
                }
 
@@ -1327,7 +1327,7 @@ nosci:
                        secy_stats->stats.InPktsUnknownSCI++;
                        u64_stats_update_end(&secy_stats->syncp);
                } else {
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                }
        }
 
@@ -3422,7 +3422,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 
        if (!secy->operational) {
                kfree_skb(skb);
-               dev->stats.tx_dropped++;
+               DEV_STATS_INC(dev, tx_dropped);
                return NETDEV_TX_OK;
        }
 
@@ -3430,7 +3430,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
        skb = macsec_encrypt(skb, dev);
        if (IS_ERR(skb)) {
                if (PTR_ERR(skb) != -EINPROGRESS)
-                       dev->stats.tx_dropped++;
+                       DEV_STATS_INC(dev, tx_dropped);
                return NETDEV_TX_OK;
        }
 
@@ -3667,9 +3667,9 @@ static void macsec_get_stats64(struct net_device *dev,
 
        dev_fetch_sw_netstats(s, dev->tstats);
 
-       s->rx_dropped = dev->stats.rx_dropped;
-       s->tx_dropped = dev->stats.tx_dropped;
-       s->rx_errors = dev->stats.rx_errors;
+       s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
+       s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
+       s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
 }
 
 static int macsec_get_iflink(const struct net_device *dev)
index b839325..81b7748 100644 (file)
@@ -186,7 +186,7 @@ int mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg)
        struct mdiobb_ctrl *ctrl = bus->priv;
 
        mdiobb_cmd_addr(ctrl, phy, devad, reg);
-       mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
+       mdiobb_cmd(ctrl, MDIO_C45_READ, phy, devad);
 
        return mdiobb_read_common(bus, phy);
 }
@@ -222,7 +222,7 @@ int mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, int reg, u16 val)
        struct mdiobb_ctrl *ctrl = bus->priv;
 
        mdiobb_cmd_addr(ctrl, phy, devad, reg);
-       mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg);
+       mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, devad);
 
        return mdiobb_write_common(bus, val);
 }
index 323bec5..3560991 100644 (file)
@@ -313,15 +313,21 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
 
        pdev = of_find_device_by_node(pcs_np);
        of_node_put(pcs_np);
-       if (!pdev || !platform_get_drvdata(pdev))
+       if (!pdev || !platform_get_drvdata(pdev)) {
+               if (pdev)
+                       put_device(&pdev->dev);
                return ERR_PTR(-EPROBE_DEFER);
+       }
 
        miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL);
-       if (!miic_port)
+       if (!miic_port) {
+               put_device(&pdev->dev);
                return ERR_PTR(-ENOMEM);
+       }
 
        miic = platform_get_drvdata(pdev);
        device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+       put_device(&pdev->dev);
 
        miic_port->miic = miic;
        miic_port->port = port - 1;
index c1f307d..8a77ec3 100644 (file)
@@ -459,21 +459,27 @@ static int at803x_set_wol(struct phy_device *phydev,
                        phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
                                      mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
 
-               /* Enable WOL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               0, AT803X_WOL_EN);
-               if (ret)
-                       return ret;
+               /* Enable WOL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            0, AT803X_WOL_EN);
+                       if (ret)
+                               return ret;
+               }
                /* Enable WOL interrupt */
                ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
                if (ret)
                        return ret;
        } else {
-               /* Disable WoL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               AT803X_WOL_EN, 0);
-               if (ret)
-                       return ret;
+               /* Disable WoL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            AT803X_WOL_EN, 0);
+                       if (ret)
+                               return ret;
+               }
                /* Disable WOL interrupt */
                ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
                if (ret)
@@ -508,11 +514,11 @@ static void at803x_get_wol(struct phy_device *phydev,
        wol->supported = WAKE_MAGIC;
        wol->wolopts = 0;
 
-       value = phy_read_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL);
+       value = phy_read(phydev, AT803X_INTR_ENABLE);
        if (value < 0)
                return;
 
-       if (value & AT803X_WOL_EN)
+       if (value & AT803X_INTR_ENABLE_WOL)
                wol->wolopts |= WAKE_MAGIC;
 }
 
@@ -858,9 +864,6 @@ static int at803x_probe(struct phy_device *phydev)
        if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
                int mode_cfg;
-               struct ethtool_wolinfo wol = {
-                       .wolopts = 0,
-               };
 
                if (ccr < 0)
                        return ccr;
@@ -877,12 +880,14 @@ static int at803x_probe(struct phy_device *phydev)
                        break;
                }
 
-               /* Disable WOL by default */
-               ret = at803x_set_wol(phydev, &wol);
-               if (ret < 0) {
-                       phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret);
+               /* Disable WoL in 1588 register which is enabled
+                * by default
+                */
+               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                    AT803X_PHY_MMD3_WOL_CTRL,
+                                    AT803X_WOL_EN, 0);
+               if (ret)
                        return ret;
-               }
        }
 
        return 0;
@@ -2059,8 +2064,6 @@ static struct phy_driver at803x_driver[] = {
        .flags                  = PHY_POLL_CABLE_TEST,
        .config_init            = at803x_config_init,
        .link_change_notify     = at803x_link_change_notify,
-       .set_wol                = at803x_set_wol,
-       .get_wol                = at803x_get_wol,
        .suspend                = at803x_suspend,
        .resume                 = at803x_resume,
        /* PHY_BASIC_FEATURES */
index 59cae0d..04b2e6e 100644 (file)
@@ -542,6 +542,17 @@ static int bcm54xx_resume(struct phy_device *phydev)
        return bcm54xx_config_init(phydev);
 }
 
+static int bcm54810_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
+{
+       return -EOPNOTSUPP;
+}
+
+static int bcm54810_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
+                             u16 val)
+{
+       return -EOPNOTSUPP;
+}
+
 static int bcm54811_config_init(struct phy_device *phydev)
 {
        int err, reg;
@@ -1103,6 +1114,8 @@ static struct phy_driver broadcom_drivers[] = {
        .get_strings    = bcm_phy_get_strings,
        .get_stats      = bcm54xx_get_stats,
        .probe          = bcm54xx_phy_probe,
+       .read_mmd       = bcm54810_read_mmd,
+       .write_mmd      = bcm54810_write_mmd,
        .config_init    = bcm54xx_config_init,
        .config_aneg    = bcm5481_config_aneg,
        .config_intr    = bcm_phy_config_intr,
index bdf00b2..a9ecfdd 100644 (file)
@@ -1184,9 +1184,11 @@ void phy_stop_machine(struct phy_device *phydev)
 
 static void phy_process_error(struct phy_device *phydev)
 {
-       mutex_lock(&phydev->lock);
+       /* phydev->lock must be held for the state change to be safe */
+       if (!mutex_is_locked(&phydev->lock))
+               phydev_err(phydev, "PHY-device data unsafe context\n");
+
        phydev->state = PHY_ERROR;
-       mutex_unlock(&phydev->lock);
 
        phy_trigger_machine(phydev);
 }
@@ -1195,7 +1197,9 @@ static void phy_error_precise(struct phy_device *phydev,
                              const void *func, int err)
 {
        WARN(1, "%pS: returned: %d\n", func, err);
+       mutex_lock(&phydev->lock);
        phy_process_error(phydev);
+       mutex_unlock(&phydev->lock);
 }
 
 /**
@@ -1204,8 +1208,7 @@ static void phy_error_precise(struct phy_device *phydev,
  *
  * Moves the PHY to the ERROR state in response to a read
  * or write error, and tells the controller the link is down.
- * Must not be called from interrupt context, or while the
- * phydev->lock is held.
+ * Must be called with phydev->lock held.
  */
 void phy_error(struct phy_device *phydev)
 {
index 61921d4..c7cf61f 100644 (file)
@@ -3216,6 +3216,8 @@ static int phy_probe(struct device *dev)
                        goto out;
        }
 
+       phy_disable_interrupts(phydev);
+
        /* Start out supporting everything. Eventually,
         * a controller will attach, and may modify one
         * or both of these values
@@ -3333,16 +3335,6 @@ static int phy_remove(struct device *dev)
        return 0;
 }
 
-static void phy_shutdown(struct device *dev)
-{
-       struct phy_device *phydev = to_phy_device(dev);
-
-       if (phydev->state == PHY_READY || !phydev->attached_dev)
-               return;
-
-       phy_disable_interrupts(phydev);
-}
-
 /**
  * phy_driver_register - register a phy_driver with the PHY layer
  * @new_driver: new phy_driver to register
@@ -3376,7 +3368,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
        new_driver->mdiodrv.driver.bus = &mdio_bus_type;
        new_driver->mdiodrv.driver.probe = phy_probe;
        new_driver->mdiodrv.driver.remove = phy_remove;
-       new_driver->mdiodrv.driver.shutdown = phy_shutdown;
        new_driver->mdiodrv.driver.owner = owner;
        new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
 
index e8dd47b..208a939 100644 (file)
@@ -258,6 +258,16 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        switch (id->base.extended_cc) {
        case SFF8024_ECC_UNSPEC:
                break;
+       case SFF8024_ECC_100G_25GAUI_C2M_AOC:
+               if (br_min <= 28000 && br_max >= 25000) {
+                       /* 25GBASE-R, possibly with FEC */
+                       __set_bit(PHY_INTERFACE_MODE_25GBASER, interfaces);
+                       /* There is currently no link mode for 25000base
+                        * with unspecified range, reuse SR.
+                        */
+                       phylink_set(modes, 25000baseSR_Full);
+               }
+               break;
        case SFF8024_ECC_100GBASE_SR4_25GBASE_SR:
                phylink_set(modes, 100000baseSR4_Full);
                phylink_set(modes, 25000baseSR_Full);
index d3dc225..382756c 100644 (file)
@@ -2200,7 +2200,9 @@ static void team_setup(struct net_device *dev)
 
        dev->hw_features = TEAM_VLAN_FEATURES |
                           NETIF_F_HW_VLAN_CTAG_RX |
-                          NETIF_F_HW_VLAN_CTAG_FILTER;
+                          NETIF_F_HW_VLAN_CTAG_FILTER |
+                          NETIF_F_HW_VLAN_STAG_RX |
+                          NETIF_F_HW_VLAN_STAG_FILTER;
 
        dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
        dev->features |= dev->hw_features;
index 25f0191..100339b 100644 (file)
@@ -1594,7 +1594,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
        if (zerocopy)
                return false;
 
-       if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+       if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
            SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
                return false;
 
index 614f3e3..ef8eacb 100644 (file)
@@ -1081,8 +1081,9 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
        for (i--; i >= start; i--)
                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+       i = end;
 err_page_pool:
-       for (i = start; i < end; i++) {
+       for (i--; i >= start; i--) {
                page_pool_destroy(priv->rq[i].page_pool);
                priv->rq[i].page_pool = NULL;
        }
@@ -1860,10 +1861,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 
                nla_peer = data[VETH_INFO_PEER];
                ifmp = nla_data(nla_peer);
-               err = rtnl_nla_parse_ifla(peer_tb,
-                                         nla_data(nla_peer) + sizeof(struct ifinfomsg),
-                                         nla_len(nla_peer) - sizeof(struct ifinfomsg),
-                                         NULL);
+               err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
                if (err < 0)
                        return err;
 
index 1270c8d..8e9f4cf 100644 (file)
@@ -2761,7 +2761,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi)
                vi->ctrl->rss.indirection_table[i] = indir_val;
        }
 
-       vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs;
+       vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
        vi->ctrl->rss.hash_key_length = vi->rss_key_size;
 
        netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
@@ -4219,8 +4219,6 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (vi->has_rss || vi->has_rss_hash_report)
                virtnet_init_default_rss(vi);
 
-       _virtnet_set_queues(vi, vi->curr_queue_pairs);
-
        /* serialize netdev register + virtio_device_ready() with ndo_open() */
        rtnl_lock();
 
@@ -4233,6 +4231,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
        virtio_device_ready(vdev);
 
+       _virtnet_set_queues(vi, vi->curr_queue_pairs);
+
        /* a random MAC address has been assigned, notify the device.
         * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
         * because many devices work fine without getting MAC explicitly
index a3de081..c3ff30a 100644 (file)
@@ -713,6 +713,12 @@ static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
        return vninode;
 }
 
+static void vxlan_vni_free(struct vxlan_vni_node *vninode)
+{
+       free_percpu(vninode->stats);
+       kfree(vninode);
+}
+
 static int vxlan_vni_add(struct vxlan_dev *vxlan,
                         struct vxlan_vni_group *vg,
                         u32 vni, union vxlan_addr *group,
@@ -740,7 +746,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
                                            &vninode->vnode,
                                            vxlan_vni_rht_params);
        if (err) {
-               kfree(vninode);
+               vxlan_vni_free(vninode);
                return err;
        }
 
@@ -763,8 +769,7 @@ static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
        struct vxlan_vni_node *v;
 
        v = container_of(rcu, struct vxlan_vni_node, rcu);
-       free_percpu(v->stats);
-       kfree(v);
+       vxlan_vni_free(v);
 }
 
 static int vxlan_vni_del(struct vxlan_dev *vxlan,
index 5bf7822..0ba714c 100644 (file)
@@ -6,7 +6,7 @@
 #include "allowedips.h"
 #include "peer.h"
 
-enum { MAX_ALLOWEDIPS_BITS = 128 };
+enum { MAX_ALLOWEDIPS_DEPTH = 129 };
 
 static struct kmem_cache *node_cache;
 
@@ -42,7 +42,7 @@ static void push_rcu(struct allowedips_node **stack,
                     struct allowedips_node __rcu *p, unsigned int *len)
 {
        if (rcu_access_pointer(p)) {
-               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS))
+               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
                        return;
                stack[(*len)++] = rcu_dereference_raw(p);
        }
@@ -55,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
 
 static void root_free_rcu(struct rcu_head *rcu)
 {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = {
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
                container_of(rcu, struct allowedips_node, rcu) };
        unsigned int len = 1;
 
@@ -68,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
 
 static void root_remove_peer_lists(struct allowedips_node *root)
 {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root };
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
        unsigned int len = 1;
 
        while (len > 0 && (node = stack[--len])) {
index 78ebe28..3d1f64f 100644 (file)
@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
        wg_allowedips_remove_by_peer(&t, a, &mutex);
        test_negative(4, a, 192, 168, 0, 1);
 
-       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node
+       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
         * if something goes wrong.
         */
-       for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) {
-               part = cpu_to_be64(~(1LLU << (i % 64)));
-               memset(&ip, 0xff, 16);
-               memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+       for (i = 0; i < 64; ++i) {
+               part = cpu_to_be64(~0LLU << i);
+               memset(&ip, 0xff, 8);
+               memcpy((u8 *)&ip + 8, &part, 8);
+               wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+               memcpy(&ip, &part, 8);
+               memset((u8 *)&ip + 8, 0, 8);
                wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
        }
-
+       memset(&ip, 0, 16);
+       wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
        wg_allowedips_free(&t, &mutex);
 
        wg_allowedips_init(&t);
index 6512267..4928e4e 100644 (file)
@@ -2144,8 +2144,7 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar,
        struct wmi_tlv *tlv;
        void *ptr;
        int i, ret, len;
-       u32 *tmp_ptr;
-       u8 extraie_len_with_pad = 0;
+       u32 *tmp_ptr, extraie_len_with_pad = 0;
        struct ath12k_wmi_hint_short_ssid_arg *s_ssid = NULL;
        struct ath12k_wmi_hint_bssid_arg *hint_bssid = NULL;
 
index de8a2e2..2a90bb2 100644 (file)
@@ -1456,6 +1456,10 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
                params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
                params_v1 = kzalloc(params_size, GFP_KERNEL);
+               if (!params_v1) {
+                       err = -ENOMEM;
+                       goto exit_params;
+               }
                params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
                brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
                kfree(params);
@@ -1473,6 +1477,7 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                        bphy_err(drvr, "error (%d)\n", err);
        }
 
+exit_params:
        kfree(params);
 exit:
        return err;
index b20409f..2097130 100644 (file)
@@ -66,6 +66,7 @@ config IWLMVM
        tristate "Intel Wireless WiFi MVM Firmware support"
        select WANT_DEV_COREDUMP
        depends on MAC80211
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This is the driver that supports the MVM firmware. The list
          of the devices that use this firmware is available here:
index b114bab..c93e625 100644 (file)
@@ -2524,7 +2524,7 @@ static int cmac_dma_init(struct rtw89_dev *rtwdev, u8 mac_idx)
        u32 reg;
        int ret;
 
-       if (chip_id != RTL8852A && chip_id != RTL8852B)
+       if (chip_id != RTL8852B)
                return 0;
 
        ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
index c8d20cd..88f760a 100644 (file)
@@ -396,7 +396,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
        struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
        struct xen_netif_tx_request *txp = first;
 
-       nr_slots = shinfo->nr_frags + 1;
+       nr_slots = shinfo->nr_frags + frag_overflow + 1;
 
        copy_count(skb) = 0;
        XENVIF_TX_CB(skb)->split_mask = 0;
@@ -462,8 +462,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                }
        }
 
-       for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
-            shinfo->nr_frags++, gop++) {
+       for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
+            shinfo->nr_frags++, gop++, nr_slots--) {
                index = pending_index(queue->pending_cons++);
                pending_idx = queue->pending_ring[index];
                xenvif_tx_create_map_op(queue, pending_idx, txp,
@@ -476,12 +476,12 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                        txp++;
        }
 
-       if (frag_overflow) {
+       if (nr_slots > 0) {
 
                shinfo = skb_shinfo(nskb);
                frags = shinfo->frags;
 
-               for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+               for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
                     shinfo->nr_frags++, txp++, gop++) {
                        index = pending_index(queue->pending_cons++);
                        pending_idx = queue->pending_ring[index];
@@ -492,6 +492,11 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                }
 
                skb_shinfo(skb)->frag_list = nskb;
+       } else if (nskb) {
+               /* A frag_list skb was allocated but it is no longer needed
+                * because enough slots were converted to copy ops above.
+                */
+               kfree_skb(nskb);
        }
 
        (*copy_ops) = cop - queue->tx_copy_ops;
index 37b6fa7..f3a01b7 100644 (file)
@@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         */
        nvme_mpath_clear_ctrl_paths(ctrl);
 
+       /*
+        * Unquiesce io queues so any pending IO won't hang, especially
+        * those submitted from scan work
+        */
+       nvme_unquiesce_io_queues(ctrl);
+
        /* prevent racing with ns scanning */
        flush_work(&ctrl->scan_work);
 
@@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         * removing the namespaces' disks; fail all the queues now to avoid
         * potentially having to clean up the failed sync later.
         */
-       if (ctrl->state == NVME_CTRL_DEAD) {
+       if (ctrl->state == NVME_CTRL_DEAD)
                nvme_mark_namespaces_dead(ctrl);
-               nvme_unquiesce_io_queues(ctrl);
-       }
 
        /* this is a no-op when called from the controller reset handler */
        nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
index 5c3250f..d39f321 100644 (file)
@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
        if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
                return 0;
 
-       rcu_read_lock();
        req = READ_ONCE(ioucmd->cookie);
        if (req && blk_rq_is_poll(req))
                ret = blk_rq_poll(req, iob, poll_flags);
-       rcu_read_unlock();
        return ret;
 }
 #ifdef CONFIG_NVME_MULTIPATH
index baf69af..2f57da1 100644 (file)
@@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x144d, 0xa80b),   /* Samsung PM9B1 256G and 512G */
-               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES |
+                               NVME_QUIRK_BOGUS_NID, },
        { PCI_DEVICE(0x144d, 0xa809),   /* Samsung MZALQ256HBJD 256G */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x144d, 0xa802),   /* Samsung SM953 */
index d433b2e..337a624 100644 (file)
@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                goto out_cleanup_tagset;
 
        if (!new) {
+               nvme_start_freeze(&ctrl->ctrl);
                nvme_unquiesce_io_queues(&ctrl->ctrl);
                if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
                        /*
@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                         * to be safe.
                         */
                        ret = -ENODEV;
+                       nvme_unfreeze(&ctrl->ctrl);
                        goto out_wait_freeze_timed_out;
                }
                blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
        if (ctrl->ctrl.queue_count > 1) {
-               nvme_start_freeze(&ctrl->ctrl);
                nvme_quiesce_io_queues(&ctrl->ctrl);
                nvme_sync_io_queues(&ctrl->ctrl);
                nvme_rdma_stop_io_queues(ctrl);
index 9ce417c..5b332d9 100644 (file)
@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                goto out_cleanup_connect_q;
 
        if (!new) {
+               nvme_start_freeze(ctrl);
                nvme_unquiesce_io_queues(ctrl);
                if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
                        /*
@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                         * to be safe.
                         */
                        ret = -ENODEV;
+                       nvme_unfreeze(ctrl);
                        goto out_wait_freeze_timed_out;
                }
                blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
        if (ctrl->queue_count <= 1)
                return;
        nvme_quiesce_admin_queue(ctrl);
-       nvme_start_freeze(ctrl);
        nvme_quiesce_io_queues(ctrl);
        nvme_sync_io_queues(ctrl);
        nvme_tcp_stop_io_queues(ctrl);
index e311d40..4999636 100644 (file)
@@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
 
-#ifdef DEBUG
-const char *action_names[] = {
+static const char *action_names[] = {
+       [0] = "INVALID",
        [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE",
        [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE",
        [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY",
        [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY",
        [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY",
 };
-#endif
 
 int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
 {
@@ -620,21 +619,9 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
                }
 
                ret = __of_add_property(ce->np, ce->prop);
-               if (ret) {
-                       pr_err("changeset: add_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
        case OF_RECONFIG_REMOVE_PROPERTY:
                ret = __of_remove_property(ce->np, ce->prop);
-               if (ret) {
-                       pr_err("changeset: remove_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
 
        case OF_RECONFIG_UPDATE_PROPERTY:
@@ -648,20 +635,17 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
                }
 
                ret = __of_update_property(ce->np, ce->prop, &old_prop);
-               if (ret) {
-                       pr_err("changeset: update_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
        default:
                ret = -EINVAL;
        }
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-       if (ret)
+       if (ret) {
+               pr_err("changeset: apply failed: %-15s %pOF:%s\n",
+                      action_names[ce->action], ce->np, ce->prop->name);
                return ret;
+       }
 
        switch (ce->action) {
        case OF_RECONFIG_ATTACH_NODE:
@@ -947,6 +931,9 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
        if (!ce)
                return -ENOMEM;
 
+       if (WARN_ON(action >= ARRAY_SIZE(action_names)))
+               return -EINVAL;
+
        /* get a reference to the node */
        ce->action = action;
        ce->np = of_node_get(np);
index f26d2ba..6827834 100644 (file)
@@ -184,7 +184,8 @@ int __init ima_free_kexec_buffer(void)
        if (ret)
                return ret;
 
-       return memblock_phys_free(addr, size);
+       memblock_free_late(addr, size);
+       return 0;
 }
 #endif
 
index 0c3475e..6a557eb 100644 (file)
@@ -141,7 +141,7 @@ struct platform_device *of_device_alloc(struct device_node *np,
        }
 
        /* setup generic device info */
-       device_set_node(&dev->dev, of_fwnode_handle(np));
+       device_set_node(&dev->dev, of_fwnode_handle(of_node_get(np)));
        dev->dev.parent = parent ? : &platform_bus;
 
        if (bus_id)
@@ -239,7 +239,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
        dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
 
        /* setup generic device info */
-       device_set_node(&dev->dev, of_fwnode_handle(node));
+       device_set_node(&dev->dev, of_fwnode_handle(of_node_get(node)));
        dev->dev.parent = parent ? : &platform_bus;
        dev->dev.platform_data = platform_data;
        if (bus_id)
index a406a12..b545fcb 100644 (file)
@@ -664,12 +664,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
        memset(&args, 0, sizeof(args));
 
        EXPECT_BEGIN(KERN_INFO,
-                    "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+                    "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
 
        rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle",
                                            "phandle", 0, &args);
        EXPECT_END(KERN_INFO,
-                  "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+                  "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
 
        unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
index bf3405f..8b1dcd5 100644 (file)
@@ -121,6 +121,8 @@ module_param(sba_reserve_agpgart, int, 0444);
 MODULE_PARM_DESC(sba_reserve_agpgart, "Reserve half of IO pdir as AGPGART");
 #endif
 
+struct proc_dir_entry *proc_runway_root __ro_after_init;
+struct proc_dir_entry *proc_mckinley_root __ro_after_init;
 
 /************************************
 ** SBA register read and write support
@@ -1968,11 +1970,15 @@ static int __init sba_driver_callback(struct parisc_device *dev)
 #ifdef CONFIG_PROC_FS
        switch (dev->id.hversion) {
        case PLUTO_MCKINLEY_PORT:
+               if (!proc_mckinley_root)
+                       proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
                root = proc_mckinley_root;
                break;
        case ASTRO_RUNWAY_PORT:
        case IKE_MERCED_PORT:
        default:
+               if (!proc_runway_root)
+                       proc_runway_root = proc_mkdir("bus/runway", NULL);
                root = proc_runway_root;
                break;
        }
index 5bc81cc..46b252b 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
+#include <linux/of.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
 
@@ -332,6 +333,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
  */
 void pci_bus_add_device(struct pci_dev *dev)
 {
+       struct device_node *dn = dev->dev.of_node;
        int retval;
 
        /*
@@ -344,7 +346,7 @@ void pci_bus_add_device(struct pci_dev *dev)
        pci_proc_attach_device(dev);
        pci_bridge_d3_update(dev);
 
-       dev->match_driver = true;
+       dev->match_driver = !dn || of_device_is_available(dn);
        retval = device_attach(&dev->dev);
        if (retval < 0 && retval != -EPROBE_DEFER)
                pci_warn(dev, "device attach failed (%d)\n", retval);
index 8d49bad..0859be8 100644 (file)
@@ -179,7 +179,6 @@ config PCI_MVEBU
        depends on MVEBU_MBUS
        depends on ARM
        depends on OF
-       depends on BROKEN
        select PCI_BRIDGE_EMUL
        help
         Add support for Marvell EBU PCIe controller. This PCIe controller
index cf61733..9952057 100644 (file)
@@ -485,20 +485,15 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
        if (ret)
                goto err_remove_edma;
 
-       if (dw_pcie_link_up(pci)) {
-               dw_pcie_print_link_status(pci);
-       } else {
+       if (!dw_pcie_link_up(pci)) {
                ret = dw_pcie_start_link(pci);
                if (ret)
                        goto err_remove_edma;
-
-               if (pci->ops && pci->ops->start_link) {
-                       ret = dw_pcie_wait_for_link(pci);
-                       if (ret)
-                               goto err_stop_link;
-               }
        }
 
+       /* Ignore errors, the link may come up later */
+       dw_pcie_wait_for_link(pci);
+
        bridge->sysdata = pp;
 
        ret = pci_host_probe(bridge);
index c87848c..1f2ee71 100644 (file)
@@ -644,20 +644,9 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index)
        dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0);
 }
 
-void dw_pcie_print_link_status(struct dw_pcie *pci)
-{
-       u32 offset, val;
-
-       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
-       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
-
-       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
-                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
-                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
-}
-
 int dw_pcie_wait_for_link(struct dw_pcie *pci)
 {
+       u32 offset, val;
        int retries;
 
        /* Check if the link is up or not */
@@ -673,7 +662,12 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
                return -ETIMEDOUT;
        }
 
-       dw_pcie_print_link_status(pci);
+       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+
+       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
+                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
+                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
 
        return 0;
 }
index 6156606..79713ce 100644 (file)
@@ -429,7 +429,6 @@ void dw_pcie_setup(struct dw_pcie *pci);
 void dw_pcie_iatu_detect(struct dw_pcie *pci);
 int dw_pcie_edma_detect(struct dw_pcie *pci);
 void dw_pcie_edma_remove(struct dw_pcie *pci);
-void dw_pcie_print_link_status(struct dw_pcie *pci);
 
 static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
 {
index 328d1e4..6011297 100644 (file)
@@ -498,6 +498,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                acpiphp_native_scan_bridge(dev);
                }
        } else {
+               LIST_HEAD(add_list);
                int max, pass;
 
                acpiphp_rescan_slot(slot);
@@ -511,10 +512,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                if (pass && dev->subordinate) {
                                        check_hotplug_bridge(slot, dev);
                                        pcibios_resource_survey_bus(dev->subordinate);
+                                       if (pci_is_root_bus(bus))
+                                               __pci_bus_size_bridges(dev->subordinate, &add_list);
                                }
                        }
                }
-               pci_assign_unassigned_bridge_resources(bus->self);
+               if (pci_is_root_bus(bus))
+                       __pci_bus_assign_resources(bus, &add_list, NULL);
+               else
+                       pci_assign_unassigned_bridge_resources(bus->self);
        }
 
        acpiphp_sanitize_bus(bus);
index e51219f..3c158b1 100644 (file)
@@ -34,11 +34,6 @@ int pci_set_of_node(struct pci_dev *dev)
        if (!node)
                return 0;
 
-       if (!of_device_is_available(node)) {
-               of_node_put(node);
-               return -ENODEV;
-       }
-
        device_set_node(&dev->dev, of_fwnode_handle(node));
        return 0;
 }
index 08b3a1b..6a3d817 100644 (file)
@@ -772,6 +772,8 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 
        /* Enable all counters */
        armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+
+       kvm_vcpu_pmu_resync_el0();
 }
 
 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
index 4a8c1b5..4dff656 100644 (file)
@@ -862,6 +862,33 @@ static const struct pinconf_ops amd_pinconf_ops = {
        .pin_config_group_set = amd_pinconf_group_set,
 };
 
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+       struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+       unsigned long flags;
+       u32 pin_reg, mask;
+       int i;
+
+       mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+               BIT(WAKE_CNTRL_OFF_S4);
+
+       for (i = 0; i < desc->npins; i++) {
+               int pin = desc->pins[i].number;
+               const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+
+               if (!pd)
+                       continue;
+
+               raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+               pin_reg = readl(gpio_dev->base + pin * 4);
+               pin_reg &= ~mask;
+               writel(pin_reg, gpio_dev->base + pin * 4);
+
+               raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+       }
+}
+
 #ifdef CONFIG_PM_SLEEP
 static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
 {
@@ -1099,6 +1126,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
                return PTR_ERR(gpio_dev->pctrl);
        }
 
+       /* Disable and mask interrupts */
+       amd_gpio_irq_init(gpio_dev);
+
        girq = &gpio_dev->gc.irq;
        gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
        /* This will let us handle the parent IRQ in the driver */
index 2585ef2..115b83e 100644 (file)
@@ -1038,6 +1038,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
        const struct msm_pingroup *g;
+       u32 intr_target_mask = GENMASK(2, 0);
        unsigned long flags;
        bool was_enabled;
        u32 val;
@@ -1074,13 +1075,15 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
         * With intr_target_use_scm interrupts are routed to
         * application cpu using scm calls.
         */
+       if (g->intr_target_width)
+               intr_target_mask = GENMASK(g->intr_target_width - 1, 0);
+
        if (pctrl->intr_target_use_scm) {
                u32 addr = pctrl->phys_base[0] + g->intr_target_reg;
                int ret;
 
                qcom_scm_io_readl(addr, &val);
-
-               val &= ~(7 << g->intr_target_bit);
+               val &= ~(intr_target_mask << g->intr_target_bit);
                val |= g->intr_target_kpss_val << g->intr_target_bit;
 
                ret = qcom_scm_io_writel(addr, val);
@@ -1090,7 +1093,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                                d->hwirq);
        } else {
                val = msm_readl_intr_target(pctrl, g);
-               val &= ~(7 << g->intr_target_bit);
+               val &= ~(intr_target_mask << g->intr_target_bit);
                val |= g->intr_target_kpss_val << g->intr_target_bit;
                msm_writel_intr_target(val, pctrl, g);
        }
index 5e4410b..1d2f2e9 100644 (file)
@@ -59,6 +59,7 @@ struct pinctrl_pin_desc;
  * @intr_status_bit:      Offset in @intr_status_reg for reading and acking the interrupt
  *                        status.
  * @intr_target_bit:      Offset in @intr_target_reg for configuring the interrupt routing.
+ * @intr_target_width:    Number of bits used for specifying interrupt routing target.
  * @intr_target_kpss_val: Value in @intr_target_bit for specifying that the interrupt from
  *                        this gpio should get routed to the KPSS processor.
  * @intr_raw_status_bit:  Offset in @intr_cfg_reg for the raw status bit.
@@ -100,6 +101,7 @@ struct msm_pingroup {
        unsigned intr_ack_high:1;
 
        unsigned intr_target_bit:5;
+       unsigned intr_target_width:5;
        unsigned intr_target_kpss_val:5;
        unsigned intr_raw_status_bit:5;
        unsigned intr_polarity_bit:5;
index 8a5cd15..8fdea25 100644 (file)
@@ -46,6 +46,7 @@
                .intr_enable_bit = 0,           \
                .intr_status_bit = 0,           \
                .intr_target_bit = 5,           \
+               .intr_target_width = 4,         \
                .intr_target_kpss_val = 3,      \
                .intr_raw_status_bit = 4,       \
                .intr_polarity_bit = 1,         \
index 40b1326..5591ddf 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/pinmux.h>
 
@@ -46,6 +47,7 @@ struct rza2_pinctrl_priv {
        struct pinctrl_dev *pctl;
        struct pinctrl_gpio_range gpio_range;
        int npins;
+       struct mutex mutex; /* serialize adding groups and functions */
 };
 
 #define RZA2_PDR(port)         (0x0000 + (port) * 2)   /* Direction 16-bit */
@@ -358,10 +360,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
                psel_val[i] = MUX_FUNC(value);
        }
 
+       mutex_lock(&priv->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
        gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL);
-       if (gsel < 0)
-               return gsel;
+       if (gsel < 0) {
+               ret = gsel;
+               goto unlock;
+       }
 
        /*
         * Register a single group function where the 'data' is an array PSEL
@@ -390,6 +396,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
        (*map)->data.mux.function = np->name;
        *num_maps = 1;
 
+       mutex_unlock(&priv->mutex);
+
        return 0;
 
 remove_function:
@@ -398,6 +406,9 @@ remove_function:
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
 
+unlock:
+       mutex_unlock(&priv->mutex);
+
        dev_err(priv->dev, "Unable to parse DT node %s\n", np->name);
 
        return ret;
@@ -473,6 +484,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev)
        if (IS_ERR(priv->base))
                return PTR_ERR(priv->base);
 
+       mutex_init(&priv->mutex);
+
        platform_set_drvdata(pdev, priv);
 
        priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) *
index b53d261..6e8a765 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/seq_file.h>
@@ -149,10 +150,11 @@ struct rzg2l_pinctrl {
        struct gpio_chip                gpio_chip;
        struct pinctrl_gpio_range       gpio_range;
        DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT);
-       spinlock_t                      bitmap_lock;
+       spinlock_t                      bitmap_lock; /* protect tint_slot bitmap */
        unsigned int                    hwirq[RZG2L_TINT_MAX_INTERRUPT];
 
-       spinlock_t                      lock;
+       spinlock_t                      lock; /* lock read/write registers */
+       struct mutex                    mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int iolh_groupa_mA[] = { 2, 4, 8, 12 };
@@ -362,11 +364,13 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                name = np->name;
        }
 
+       mutex_lock(&pctrl->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
        gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
        if (gsel < 0) {
                ret = gsel;
-               goto done;
+               goto unlock;
        }
 
        /*
@@ -380,6 +384,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                goto remove_group;
        }
 
+       mutex_unlock(&pctrl->mutex);
+
        maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
        maps[idx].data.mux.group = name;
        maps[idx].data.mux.function = name;
@@ -391,6 +397,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+       mutex_unlock(&pctrl->mutex);
 done:
        *index = idx;
        kfree(configs);
@@ -1509,6 +1517,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev)
 
        spin_lock_init(&pctrl->lock);
        spin_lock_init(&pctrl->bitmap_lock);
+       mutex_init(&pctrl->mutex);
 
        platform_set_drvdata(pdev, pctrl);
 
index 35b23c1..9146101 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/spinlock.h>
 
@@ -123,7 +124,8 @@ struct rzv2m_pinctrl {
        struct gpio_chip                gpio_chip;
        struct pinctrl_gpio_range       gpio_range;
 
-       spinlock_t                      lock;
+       spinlock_t                      lock; /* lock read/write registers */
+       struct mutex                    mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int drv_1_8V_group2_uA[] = { 1800, 3800, 7800, 11000 };
@@ -322,11 +324,13 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                name = np->name;
        }
 
+       mutex_lock(&pctrl->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
        gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
        if (gsel < 0) {
                ret = gsel;
-               goto done;
+               goto unlock;
        }
 
        /*
@@ -340,6 +344,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                goto remove_group;
        }
 
+       mutex_unlock(&pctrl->mutex);
+
        maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
        maps[idx].data.mux.group = name;
        maps[idx].data.mux.function = name;
@@ -351,6 +357,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+       mutex_unlock(&pctrl->mutex);
 done:
        *index = idx;
        kfree(configs);
@@ -1071,6 +1079,7 @@ static int rzv2m_pinctrl_probe(struct platform_device *pdev)
        }
 
        spin_lock_init(&pctrl->lock);
+       mutex_init(&pctrl->mutex);
 
        platform_set_drvdata(pdev, pctrl);
 
index a79318e..b600b77 100644 (file)
@@ -887,6 +887,7 @@ static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
                        tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
                        mlxbf_tmfifo_console_output(tm_vdev, vring);
                        spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
+                       set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
                } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
                                            &fifo->pend_events)) {
                        return true;
index ab69d51..a70e677 100644 (file)
@@ -176,7 +176,8 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf)
 
 int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev)
 {
-       u8 mode, flag = 0;
+       u8 flag = 0;
+       int mode;
        int src;
 
        mode = amd_pmf_get_pprof_modes(dev);
index d2fee9a..6d9297c 100644 (file)
@@ -1049,6 +1049,11 @@ static const struct key_entry ideapad_keymap[] = {
        { KE_IGNORE,    0x03 | IDEAPAD_WMI_KEY },
        /* Customizable Lenovo Hotkey ("star" with 'S' inside) */
        { KE_KEY,       0x01 | IDEAPAD_WMI_KEY, { KEY_FAVORITES } },
+       { KE_KEY,       0x04 | IDEAPAD_WMI_KEY, { KEY_SELECTIVE_SCREENSHOT } },
+       /* Lenovo Support */
+       { KE_KEY,       0x07 | IDEAPAD_WMI_KEY, { KEY_HELP } },
+       { KE_KEY,       0x0e | IDEAPAD_WMI_KEY, { KEY_PICKUP_PHONE } },
+       { KE_KEY,       0x0f | IDEAPAD_WMI_KEY, { KEY_HANGUP_PHONE } },
        /* Dark mode toggle */
        { KE_KEY,       0x13 | IDEAPAD_WMI_KEY, { KEY_PROG1 } },
        /* Sound profile switch */
index 1f59ac5..a95004e 100644 (file)
@@ -335,8 +335,8 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
 
                node = dev_to_node(&_pci_dev->dev);
                if (node == NUMA_NO_NODE) {
-                       pr_info("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
-                               cpu, bus_no, dev, fn);
+                       pr_info_once("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
+                                    cpu, bus_no, dev, fn);
                        continue;
                }
 
index 4167618..e1fbc35 100644 (file)
@@ -24,6 +24,10 @@ static bool ec_trigger __read_mostly;
 module_param(ec_trigger, bool, 0444);
 MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events");
 
+static bool force;
+module_param(force, bool, 0444);
+MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type");
+
 static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
        {
                /* Lenovo Yoga 7 14ARB7 */
@@ -32,6 +36,27 @@ static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "82QF"),
                },
        },
+       {
+               /* Lenovo Yoga 7 14ACN6 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82N7"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id allowed_chasis_types_dmi_table[] = {
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */),
+               },
+       },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */),
+               },
+       },
        { }
 };
 
@@ -111,6 +136,13 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx)
        struct input_dev *input_dev;
        int err;
 
+       if (!dmi_check_system(allowed_chasis_types_dmi_table)) {
+               if (force)
+                       dev_info(&wdev->dev, "Force loading Lenovo YMC support\n");
+               else
+                       return -ENODEV;
+       }
+
        ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table);
 
        priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL);
index 67367f0..7d33977 100644 (file)
 #define MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET        0x37
 #define MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET       0x3a
 #define MLXPLAT_CPLD_LPC_REG_AGGR_MASK_OFFSET  0x3b
-#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET       0x3c
-#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET       0x3d
-#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET       0x3e
-#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET       0x3f
 #define MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET     0x40
 #define MLXPLAT_CPLD_LPC_REG_AGGRLO_MASK_OFFSET        0x41
 #define MLXPLAT_CPLD_LPC_REG_AGGRCO_OFFSET     0x42
 #define MLXPLAT_CPLD_LPC_REG_LC_SD_EVENT_OFFSET        0xaa
 #define MLXPLAT_CPLD_LPC_REG_LC_SD_MASK_OFFSET 0xab
 #define MLXPLAT_CPLD_LPC_REG_LC_PWR_ON         0xb2
+#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET       0xb6
+#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET       0xb7
+#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET       0xb8
+#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET       0xb9
 #define MLXPLAT_CPLD_LPC_REG_GP4_RO_OFFSET     0xc2
 #define MLXPLAT_CPLD_LPC_REG_SPI_CHNL_SELECT   0xc3
 #define MLXPLAT_CPLD_LPC_REG_WD_CLEAR_OFFSET   0xc7
                                         MLXPLAT_CPLD_AGGR_MASK_LC_SDWN)
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_LOW 0xc1
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_ASIC2       BIT(2)
-#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT     BIT(4)
+#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT     GENMASK(5, 4)
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_I2C BIT(6)
 #define MLXPLAT_CPLD_PSU_MASK          GENMASK(1, 0)
 #define MLXPLAT_CPLD_PWR_MASK          GENMASK(1, 0)
 #define MLXPLAT_CPLD_GWP_MASK          GENMASK(0, 0)
 #define MLXPLAT_CPLD_EROT_MASK         GENMASK(1, 0)
 #define MLXPLAT_CPLD_PWR_BUTTON_MASK   BIT(0)
-#define MLXPLAT_CPLD_LATCH_RST_MASK    BIT(5)
+#define MLXPLAT_CPLD_LATCH_RST_MASK    BIT(6)
 #define MLXPLAT_CPLD_THERMAL1_PDB_MASK BIT(3)
 #define MLXPLAT_CPLD_THERMAL2_PDB_MASK BIT(4)
 #define MLXPLAT_CPLD_INTRUSION_MASK    BIT(6)
@@ -2356,7 +2356,7 @@ mlxplat_mlxcpld_l1_switch_pwr_events_handler(void *handle, enum mlxreg_hotplug_k
                                             u8 action)
 {
        dev_info(&mlxplat_dev->dev, "System shutdown due to short press of power button");
-       kernel_halt();
+       kernel_power_off();
        return 0;
 }
 
@@ -2475,7 +2475,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
                .reg = MLXPLAT_CPLD_LPC_REG_PWRB_OFFSET,
                .mask = MLXPLAT_CPLD_PWR_BUTTON_MASK,
                .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_pwr_events_items_data),
-               .inversed = 0,
+               .inversed = 1,
                .health = false,
        },
        {
@@ -2484,7 +2484,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
                .reg = MLXPLAT_CPLD_LPC_REG_BRD_OFFSET,
                .mask = MLXPLAT_CPLD_L1_CHA_HEALTH_MASK,
                .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_health_events_items_data),
-               .inversed = 0,
+               .inversed = 1,
                .health = false,
                .ind = 8,
        },
@@ -3677,7 +3677,7 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_regs_io_data[] = {
        {
                .label = "latch_reset",
                .reg = MLXPLAT_CPLD_LPC_REG_GP1_OFFSET,
-               .mask = GENMASK(7, 0) & ~BIT(5),
+               .mask = GENMASK(7, 0) & ~BIT(6),
                .mode = 0200,
        },
        {
@@ -6238,8 +6238,6 @@ static void mlxplat_i2c_mux_topolgy_exit(struct mlxplat_priv *priv)
                if (priv->pdev_mux[i])
                        platform_device_unregister(priv->pdev_mux[i]);
        }
-
-       mlxplat_post_exit();
 }
 
 static int mlxplat_i2c_main_complition_notify(void *handle, int id)
@@ -6369,6 +6367,7 @@ static void __exit mlxplat_exit(void)
                pm_power_off = NULL;
        mlxplat_pre_exit(priv);
        mlxplat_i2c_main_exit(priv);
+       mlxplat_post_exit();
 }
 module_exit(mlxplat_exit);
 
index ff93986..f26a312 100644 (file)
 #include <linux/seq_file.h>
 #include <linux/string.h>
 
-static const char *const SM_ECO_NAME       = "eco";
-static const char *const SM_COMFORT_NAME   = "comfort";
-static const char *const SM_SPORT_NAME     = "sport";
-static const char *const SM_TURBO_NAME     = "turbo";
-
-static const char *const FM_AUTO_NAME     = "auto";
-static const char *const FM_SILENT_NAME   = "silent";
-static const char *const FM_BASIC_NAME    = "basic";
-static const char *const FM_ADVANCED_NAME = "advanced";
+#define SM_ECO_NAME            "eco"
+#define SM_COMFORT_NAME                "comfort"
+#define SM_SPORT_NAME          "sport"
+#define SM_TURBO_NAME          "turbo"
+
+#define FM_AUTO_NAME           "auto"
+#define FM_SILENT_NAME         "silent"
+#define FM_BASIC_NAME          "basic"
+#define FM_ADVANCED_NAME       "advanced"
 
 static const char * const ALLOWED_FW_0[] __initconst = {
        "14C1EMS1.012",
index 2c2abf6..8158e3c 100644 (file)
@@ -329,6 +329,19 @@ static const struct smi_node cs35l41_hda = {
        .bus_type = SMI_AUTO_DETECT,
 };
 
+static const struct smi_node cs35l56_hda = {
+       .instances = {
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               /* a 5th entry is an alias address, not a real device */
+               { "cs35l56-hda_dummy_dev" },
+               {}
+       },
+       .bus_type = SMI_AUTO_DETECT,
+};
+
 /*
  * Note new device-ids must also be added to ignore_serial_bus_ids in
  * drivers/acpi/scan.c: acpi_device_enumeration_by_parent().
@@ -337,6 +350,7 @@ static const struct acpi_device_id smi_acpi_ids[] = {
        { "BSG1160", (unsigned long)&bsg1160_data },
        { "BSG2150", (unsigned long)&bsg2150_data },
        { "CSC3551", (unsigned long)&cs35l41_hda },
+       { "CSC3556", (unsigned long)&cs35l56_hda },
        { "INT3515", (unsigned long)&int3515_data },
        /* Non-conforming _HID for Cirrus Logic already released */
        { "CLSA0100", (unsigned long)&cs35l41_hda },
index dfd5ec9..a062166 100644 (file)
@@ -778,9 +778,6 @@ static int da9063_check_xvp_constraints(struct regulator_config *config)
        const struct notification_limit *uv_l = &constr->under_voltage_limits;
        const struct notification_limit *ov_l = &constr->over_voltage_limits;
 
-       if (!config->init_data) /* No config in DT, pointers will be invalid */
-               return 0;
-
        /* make sure that only one severity is used to clarify if unchanged, enabled or disabled */
        if ((!!uv_l->prot + !!uv_l->err + !!uv_l->warn) > 1) {
                dev_err(config->dev, "%s: at most one voltage monitoring severity allowed!\n",
@@ -1031,9 +1028,12 @@ static int da9063_regulator_probe(struct platform_device *pdev)
                        config.of_node = da9063_reg_matches[id].of_node;
                config.regmap = da9063->regmap;
 
-               ret = da9063_check_xvp_constraints(&config);
-               if (ret)
-                       return ret;
+               /* Checking constraints requires init_data from DT. */
+               if (config.init_data) {
+                       ret = da9063_check_xvp_constraints(&config);
+                       if (ret)
+                               return ret;
+               }
 
                regl->rdev = devm_regulator_register(&pdev->dev, &regl->desc,
                                                     &config);
index f3b280a..cd077b7 100644 (file)
@@ -1068,7 +1068,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = {
        RPMH_VREG("ldo9",   "ldo%s9",  &pmic5_pldo,    "vdd-l8-l9"),
        RPMH_VREG("ldo10",  "ldo%s10", &pmic5_nldo515,    "vdd-l1-l4-l10"),
        RPMH_VREG("ldo11",  "ldo%s11", &pmic5_nldo515,    "vdd-l11"),
-       RPMH_VREG("ldo12",  "ldo%s12", &pmic5_pldo,    "vdd-l12"),
+       RPMH_VREG("ldo12",  "ldo%s12", &pmic5_nldo515,    "vdd-l12"),
        RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,    "vdd-l2-l13-l14"),
        RPMH_VREG("ldo14",  "ldo%s14", &pmic5_pldo,    "vdd-l2-l13-l14"),
        RPMH_VREG("ldo15",  "ldo%s15", &pmic5_nldo515,    "vdd-l15"),
index b441745..0509f80 100644 (file)
 #define AP_QUEUE_UNASSIGNED "unassigned"
 #define AP_QUEUE_IN_USE "in use"
 
-#define MAX_RESET_CHECK_WAIT   200     /* Sleep max 200ms for reset check      */
 #define AP_RESET_INTERVAL              20      /* Reset sleep interval (20ms)          */
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
 
 /**
  * get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@@ -360,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
        return 0;
 }
 
+static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
+{
+       int ret;
+
+       /*
+        * The nib has to be located in shared storage since guest and
+        * host access it. vfio_pin_pages() will do a pin shared and
+        * if that fails (possibly because it's not a shared page) it
+        * calls export. We try to do a second pin shared here so that
+        * the UV gives us an error code if we try to pin a non-shared
+        * page.
+        *
+        * If the page is already pinned shared the UV will return a success.
+        */
+       ret = uv_pin_shared(addr);
+       if (ret) {
+               /* vfio_pin_pages() likely exported the page so let's re-import */
+               gmap_convert_to_secure(gmap, addr);
+       }
+       return ret;
+}
+
 /**
  * vfio_ap_irq_enable - Enable Interruption for a APQN
  *
@@ -423,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
        h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
        aqic_gisa.gisc = isc;
 
+       /* NIB in non-shared storage is a rc 6 for PV guests */
+       if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+           ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
+               vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
+               status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+               return status;
+       }
+
        nisc = kvm_s390_gisc_register(kvm, isc);
        if (nisc < 0) {
                VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
@@ -675,7 +704,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
                         */
                        apqn = AP_MKQID(apid, apqi);
                        q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-                       if (!q || q->reset_rc) {
+                       if (!q || q->reset_status.response_code) {
                                clear_bit_inv(apid,
                                              matrix_mdev->shadow_apcb.apm);
                                break;
@@ -1608,19 +1637,21 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 {
        switch (status->response_code) {
        case AP_RESPONSE_NORMAL:
+       case AP_RESPONSE_DECONFIGURED:
+               return 0;
        case AP_RESPONSE_RESET_IN_PROGRESS:
-               if (status->queue_empty && !status->irq_enabled)
-                       return 0;
+       case AP_RESPONSE_BUSY:
                return -EBUSY;
-       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
+       case AP_RESPONSE_ASSOC_FAILED:
                /*
-                * If the AP queue is deconfigured, any subsequent AP command
-                * targeting the queue will fail with the same response code. On the
-                * other hand, when an AP adapter is deconfigured, the associated
-                * queues are reset, so let's return a value indicating the reset
-                * for which we're waiting completed successfully.
+                * These asynchronous response codes indicate a PQAP(AAPQ)
+                * instruction to associate a secret with the guest failed. All
+                * subsequent AP instructions will end with the asynchronous
+                * response code until the AP queue is reset; so, let's return
+                * a value indicating a reset needs to be performed again.
                 */
-               return 0;
+               return -EAGAIN;
        default:
                WARN(true,
                     "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@@ -1630,91 +1661,105 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
        }
 }
 
-static int apq_reset_check(struct vfio_ap_queue *q)
+#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
+
+static void apq_reset_check(struct work_struct *reset_work)
 {
-       int ret;
-       int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
+       int ret = -EBUSY, elapsed = 0;
        struct ap_queue_status status;
+       struct vfio_ap_queue *q;
 
-       for (; iters > 0; iters--) {
+       q = container_of(reset_work, struct vfio_ap_queue, reset_work);
+       memcpy(&status, &q->reset_status, sizeof(status));
+       while (true) {
                msleep(AP_RESET_INTERVAL);
+               elapsed += AP_RESET_INTERVAL;
                status = ap_tapq(q->apqn, NULL);
                ret = apq_status_check(q->apqn, &status);
-               if (ret != -EBUSY)
-                       return ret;
+               if (ret == -EIO)
+                       return;
+               if (ret == -EBUSY) {
+                       pr_notice_ratelimited(WAIT_MSG, elapsed,
+                                             AP_QID_CARD(q->apqn),
+                                             AP_QID_QUEUE(q->apqn),
+                                             status.response_code,
+                                             status.queue_empty,
+                                             status.irq_enabled);
+               } else {
+                       if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
+                           q->reset_status.response_code == AP_RESPONSE_BUSY ||
+                           q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
+                           ret == -EAGAIN) {
+                               status = ap_zapq(q->apqn, 0);
+                               memcpy(&q->reset_status, &status, sizeof(status));
+                               continue;
+                       }
+                       /*
+                        * When an AP adapter is deconfigured, the
+                        * associated queues are reset, so let's set the
+                        * status response code to 0 so the queue may be
+                        * passed through (i.e., not filtered)
+                        */
+                       if (status.response_code == AP_RESPONSE_DECONFIGURED)
+                               q->reset_status.response_code = 0;
+                       if (q->saved_isc != VFIO_AP_ISC_INVALID)
+                               vfio_ap_free_aqic_resources(q);
+                       break;
+               }
        }
-       WARN_ONCE(iters <= 0,
-                 "timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
-                 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
-                 status.queue_empty, status.irq_enabled, status.response_code);
-       return ret;
 }
 
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 {
        struct ap_queue_status status;
-       int ret;
 
        if (!q)
-               return 0;
-retry_zapq:
+               return;
        status = ap_zapq(q->apqn, 0);
-       q->reset_rc = status.response_code;
+       memcpy(&q->reset_status, &status, sizeof(status));
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               ret = 0;
-               /* if the reset has not completed, wait for it to take effect */
-               if (!status.queue_empty || status.irq_enabled)
-                       ret = apq_reset_check(q);
-               break;
        case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_BUSY:
+       case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
                /*
-                * There is a reset issued by another process in progress. Let's wait
-                * for that to complete. Since we have no idea whether it was a RAPQ or
-                * ZAPQ, then if it completes successfully, let's issue the ZAPQ.
+                * Let's verify whether the ZAPQ completed successfully on a work queue.
                 */
-               ret = apq_reset_check(q);
-               if (ret)
-                       break;
-               goto retry_zapq;
+               queue_work(system_long_wq, &q->reset_work);
+               break;
        case AP_RESPONSE_DECONFIGURED:
                /*
                 * When an AP adapter is deconfigured, the associated
-                * queues are reset, so let's return a value indicating the reset
-                * completed successfully.
+                * queues are reset, so let's set the status response code to 0
+                * so the queue may be passed through (i.e., not filtered).
                 */
-               ret = 0;
+               q->reset_status.response_code = 0;
+               vfio_ap_free_aqic_resources(q);
                break;
        default:
                WARN(true,
                     "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
                     AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
                     status.response_code);
-               return -EIO;
        }
-
-       vfio_ap_free_aqic_resources(q);
-
-       return ret;
 }
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
 {
-       int ret, loop_cursor, rc = 0;
+       int ret = 0, loop_cursor;
        struct vfio_ap_queue *q;
 
+       hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+               vfio_ap_mdev_reset_queue(q);
+
        hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
-               ret = vfio_ap_mdev_reset_queue(q);
-               /*
-                * Regardless whether a queue turns out to be busy, or
-                * is not operational, we need to continue resetting
-                * the remaining queues.
-                */
-               if (ret)
-                       rc = ret;
+               flush_work(&q->reset_work);
+
+               if (q->reset_status.response_code)
+                       ret = -EIO;
        }
 
-       return rc;
+       return ret;
 }
 
 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@@ -2038,6 +2083,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
 
        q->apqn = to_ap_queue(&apdev->device)->qid;
        q->saved_isc = VFIO_AP_ISC_INVALID;
+       memset(&q->reset_status, 0, sizeof(q->reset_status));
+       INIT_WORK(&q->reset_work, apq_reset_check);
        matrix_mdev = get_update_locks_by_apqn(q->apqn);
 
        if (matrix_mdev) {
@@ -2087,6 +2134,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
        }
 
        vfio_ap_mdev_reset_queue(q);
+       flush_work(&q->reset_work);
        dev_set_drvdata(&apdev->device, NULL);
        kfree(q);
        release_update_locks_for_mdev(matrix_mdev);
index 4642bbd..88aff8b 100644 (file)
@@ -133,7 +133,8 @@ struct ap_matrix_mdev {
  * @apqn: the APQN of the AP queue device
  * @saved_isc: the guest ISC registered with the GIB interface
  * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
- * @reset_rc: the status response code from the last reset of the queue
+ * @reset_status: the status from the last reset of the queue
+ * @reset_work: work to wait for queue reset to complete
  */
 struct vfio_ap_queue {
        struct ap_matrix_mdev *matrix_mdev;
@@ -142,7 +143,8 @@ struct vfio_ap_queue {
 #define VFIO_AP_ISC_INVALID 0xff
        unsigned char saved_isc;
        struct hlist_node mdev_qnode;
-       unsigned int reset_rc;
+       struct ap_queue_status reset_status;
+       struct work_struct reset_work;
 };
 
 int vfio_ap_mdev_register(void);
index e1e4f9d..857be0f 100644 (file)
@@ -1598,7 +1598,7 @@ NCR_700_intr(int irq, void *dev_id)
                                printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
 #endif
                                resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch;
-                       } else if(dsp >= to32bit(&slot->pSG[0].ins) &&
+                       } else if (slot && dsp >= to32bit(&slot->pSG[0].ins) &&
                                  dsp <= to32bit(&slot->pSG[NCR_700_SG_SEGMENTS].ins)) {
                                int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff;
                                int SGcount = (dsp - to32bit(&slot->pSG[0].ins))/sizeof(struct NCR_700_SG_List);
index d82de34..e51e92f 100644 (file)
@@ -27,7 +27,7 @@
 
 #define DRV_NAME               "fnic"
 #define DRV_DESCRIPTION                "Cisco FCoE HBA Driver"
-#define DRV_VERSION            "1.6.0.54"
+#define DRV_VERSION            "1.6.0.55"
 #define PFX                    DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
index 26dbd34..be89ce9 100644 (file)
@@ -2139,7 +2139,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
                                     bool new_sc)
 
 {
-       int ret = SUCCESS;
+       int ret = 0;
        struct fnic_pending_aborts_iter_data iter_data = {
                .fnic = fnic,
                .lun_dev = lr_sc->device,
@@ -2159,9 +2159,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 
        /* walk again to check, if IOs are still pending in fw */
        if (fnic_is_abts_pending(fnic, lr_sc))
-               ret = FAILED;
+               ret = 1;
 
 clean_pending_aborts_end:
+       FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+                       "%s: exit status: %d\n", __func__, ret);
        return ret;
 }
 
index a62e091..d26941b 100644 (file)
@@ -109,8 +109,6 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
        }
 }
 
-#define LPFC_INVALID_REFTAG ((u32)-1)
-
 /**
  * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
  * @phba: The Hba for which this call is being executed.
@@ -978,8 +976,6 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        sgpe = scsi_prot_sglist(sc);
        lba = scsi_prot_ref_tag(sc);
-       if (lba == LPFC_INVALID_REFTAG)
-               return 0;
 
        /* First check if we need to match the LBA */
        if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) {
@@ -1560,8 +1556,6 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        /* extract some info from the scsi command for pde*/
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1723,8 +1717,6 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        /* extract some info from the scsi command */
        blksize = scsi_prot_interval(sc);
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1953,8 +1945,6 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        /* extract some info from the scsi command for pde*/
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2154,8 +2144,6 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        /* extract some info from the scsi command */
        blksize = scsi_prot_interval(sc);
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2746,8 +2734,6 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 
                src = (struct scsi_dif_tuple *)sg_virt(sgpe);
                start_ref_tag = scsi_prot_ref_tag(cmd);
-               if (start_ref_tag == LPFC_INVALID_REFTAG)
-                       goto out;
                start_app_tag = src->app_tag;
                len = sgpe->length;
                while (src && protsegcnt) {
@@ -3493,11 +3479,11 @@ err:
                             scsi_cmnd->sc_data_direction);
 
        lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-                       "9084 Cannot setup S/G List for HBA"
-                       "IO segs %d/%d SGL %d SCSI %d: %d %d\n",
+                       "9084 Cannot setup S/G List for HBA "
+                       "IO segs %d/%d SGL %d SCSI %d: %d %d %d\n",
                        lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
                        phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
-                       prot_group_type, num_sge);
+                       prot_group_type, num_sge, ret);
 
        lpfc_cmd->seg_cnt = 0;
        lpfc_cmd->prot_seg_cnt = 0;
index 2a31ddc..7825765 100644 (file)
@@ -31,6 +31,7 @@ static void qedf_remove(struct pci_dev *pdev);
 static void qedf_shutdown(struct pci_dev *pdev);
 static void qedf_schedule_recovery_handler(void *dev);
 static void qedf_recovery_handler(struct work_struct *work);
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state);
 
 /*
  * Driver module parameters.
@@ -3271,6 +3272,7 @@ static struct pci_driver qedf_pci_driver = {
        .probe = qedf_probe,
        .remove = qedf_remove,
        .shutdown = qedf_shutdown,
+       .suspend = qedf_suspend,
 };
 
 static int __qedf_probe(struct pci_dev *pdev, int mode)
@@ -4000,6 +4002,22 @@ static void qedf_shutdown(struct pci_dev *pdev)
        __qedf_remove(pdev, QEDF_MODE_NORMAL);
 }
 
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct qedf_ctx *qedf;
+
+       if (!pdev) {
+               QEDF_ERR(NULL, "pdev is NULL.\n");
+               return -ENODEV;
+       }
+
+       qedf = pci_get_drvdata(pdev);
+
+       QEDF_ERR(&qedf->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+       return -EPERM;
+}
+
 /*
  * Recovery handler code
  */
index 450522b..cd0180b 100644 (file)
@@ -69,6 +69,7 @@ static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
 static void qedi_recovery_handler(struct work_struct *work);
 static void qedi_schedule_hw_err_handler(void *dev,
                                         enum qed_hw_err_type err_type);
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state);
 
 static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
 {
@@ -1976,8 +1977,9 @@ static int qedi_cpu_offline(unsigned int cpu)
        struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
        struct qedi_work *work, *tmp;
        struct task_struct *thread;
+       unsigned long flags;
 
-       spin_lock_bh(&p->p_work_lock);
+       spin_lock_irqsave(&p->p_work_lock, flags);
        thread = p->iothread;
        p->iothread = NULL;
 
@@ -1988,7 +1990,7 @@ static int qedi_cpu_offline(unsigned int cpu)
                        kfree(work);
        }
 
-       spin_unlock_bh(&p->p_work_lock);
+       spin_unlock_irqrestore(&p->p_work_lock, flags);
        if (thread)
                kthread_stop(thread);
        return 0;
@@ -2510,6 +2512,22 @@ static void qedi_shutdown(struct pci_dev *pdev)
        __qedi_remove(pdev, QEDI_MODE_SHUTDOWN);
 }
 
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct qedi_ctx *qedi;
+
+       if (!pdev) {
+               QEDI_ERR(NULL, "pdev is NULL.\n");
+               return -ENODEV;
+       }
+
+       qedi = pci_get_drvdata(pdev);
+
+       QEDI_ERR(&qedi->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+       return -EPERM;
+}
+
 static int __qedi_probe(struct pci_dev *pdev, int mode)
 {
        struct qedi_ctx *qedi;
@@ -2868,6 +2886,7 @@ static struct pci_driver qedi_pci_driver = {
        .remove = qedi_remove,
        .shutdown = qedi_shutdown,
        .err_handler = &qedi_err_handler,
+       .suspend = qedi_suspend,
 };
 
 static int __init qedi_init(void)
index 898a0bd..95a86e0 100644 (file)
@@ -209,53 +209,6 @@ raid_attr_ro_state(level);
 raid_attr_ro_fn(resync);
 raid_attr_ro_state_fn(state);
 
-static void raid_component_release(struct device *dev)
-{
-       struct raid_component *rc =
-               container_of(dev, struct raid_component, dev);
-       dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n");
-       put_device(rc->dev.parent);
-       kfree(rc);
-}
-
-int raid_component_add(struct raid_template *r,struct device *raid_dev,
-                      struct device *component_dev)
-{
-       struct device *cdev =
-               attribute_container_find_class_device(&r->raid_attrs.ac,
-                                                     raid_dev);
-       struct raid_component *rc;
-       struct raid_data *rd = dev_get_drvdata(cdev);
-       int err;
-
-       rc = kzalloc(sizeof(*rc), GFP_KERNEL);
-       if (!rc)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&rc->node);
-       device_initialize(&rc->dev);
-       rc->dev.release = raid_component_release;
-       rc->dev.parent = get_device(component_dev);
-       rc->num = rd->component_count++;
-
-       dev_set_name(&rc->dev, "component-%d", rc->num);
-       list_add_tail(&rc->node, &rd->component_list);
-       rc->dev.class = &raid_class.class;
-       err = device_add(&rc->dev);
-       if (err)
-               goto err_out;
-
-       return 0;
-
-err_out:
-       list_del(&rc->node);
-       rd->component_count--;
-       put_device(component_dev);
-       kfree(rc);
-       return err;
-}
-EXPORT_SYMBOL(raid_component_add);
-
 struct raid_template *
 raid_class_attach(struct raid_function_template *ft)
 {
index 4a6eb17..41f23cd 100644 (file)
@@ -406,7 +406,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
                               size_t length, loff_t *ppos)
 {
        int host, channel, id, lun;
-       char *buffer, *p;
+       char *buffer, *end, *p;
        int err;
 
        if (!buf || length > PAGE_SIZE)
@@ -421,10 +421,14 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
                goto out;
 
        err = -EINVAL;
-       if (length < PAGE_SIZE)
-               buffer[length] = '\0';
-       else if (buffer[PAGE_SIZE-1])
-               goto out;
+       if (length < PAGE_SIZE) {
+               end = buffer + length;
+               *end = '\0';
+       } else {
+               end = buffer + PAGE_SIZE - 1;
+               if (*end)
+                       goto out;
+       }
 
        /*
         * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
@@ -433,10 +437,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
        if (!strncmp("scsi add-single-device", buffer, 22)) {
                p = buffer + 23;
 
-               host = simple_strtoul(p, &p, 0);
-               channel = simple_strtoul(p + 1, &p, 0);
-               id = simple_strtoul(p + 1, &p, 0);
-               lun = simple_strtoul(p + 1, &p, 0);
+               host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
+               channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
 
                err = scsi_add_single_device(host, channel, id, lun);
 
@@ -447,10 +451,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
        } else if (!strncmp("scsi remove-single-device", buffer, 25)) {
                p = buffer + 26;
 
-               host = simple_strtoul(p, &p, 0);
-               channel = simple_strtoul(p + 1, &p, 0);
-               id = simple_strtoul(p + 1, &p, 0);
-               lun = simple_strtoul(p + 1, &p, 0);
+               host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
+               channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
 
                err = scsi_remove_single_device(host, channel, id, lun);
        }
index 3e2e578..4db3ba6 100644 (file)
@@ -307,7 +307,7 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
                spin_lock_irqsave(snic->shost->host_lock, flags);
                list_del(&tgt->list);
                spin_unlock_irqrestore(snic->shost->host_lock, flags);
-               kfree(tgt);
+               put_device(&tgt->dev);
                tgt = NULL;
 
                return tgt;
index f282321..047ffaf 100644 (file)
@@ -1674,10 +1674,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
  */
 static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
 {
-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
-       if (scmnd->device->host->transportt == fc_transport_template)
-               return fc_eh_timed_out(scmnd);
-#endif
        return SCSI_EH_RESET_TIMER;
 }
 
index 1ca1403..3f75912 100644 (file)
@@ -137,6 +137,7 @@ static int __init aspeed_socinfo_init(void)
 
        soc_dev = soc_device_register(attrs);
        if (IS_ERR(soc_dev)) {
+               kfree(attrs->machine);
                kfree(attrs->soc_id);
                kfree(attrs->serial_number);
                kfree(attrs);
index ef8b24f..59123e1 100644 (file)
@@ -524,7 +524,7 @@ static ssize_t aspeed_uart_routing_store(struct device *dev,
        struct aspeed_uart_routing_selector *sel = to_routing_selector(attr);
        int val;
 
-       val = match_string(sel->options, -1, buf);
+       val = __sysfs_match_string(sel->options, -1, buf);
        if (val < 0) {
                dev_err(dev, "invalid value \"%s\"\n", buf);
                return -EINVAL;
index de8fe3c..9b02139 100644 (file)
@@ -317,12 +317,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
        xspi->rx_bytes -= nrx;
 
        while (ntx || nrx) {
-               /* When xspi in busy condition, bytes may send failed,
-                * then spi control did't work thoroughly, add one byte delay
-                */
-               if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
-                       udelay(10);
-
                if (ntx) {
                        if (xspi->txbuf)
                                cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
@@ -392,6 +386,11 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
                if (xspi->tx_bytes) {
                        cdns_spi_process_fifo(xspi, trans_cnt, trans_cnt);
                } else {
+                       /* Fixed delay due to controller limitation with
+                        * RX_NEMPTY incorrect status
+                        * Xilinx AR:65885 contains more details
+                        */
+                       udelay(10);
                        cdns_spi_process_fifo(xspi, 0, trans_cnt);
                        cdns_spi_write(xspi, CDNS_SPI_IDR,
                                       CDNS_SPI_IXR_DEFAULT);
@@ -439,12 +438,18 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
                cdns_spi_setup_transfer(spi, transfer);
        } else {
                /* Set TX empty threshold to half of FIFO depth
-                * only if TX bytes are more than half FIFO depth.
+                * only if TX bytes are more than FIFO depth.
                 */
                if (xspi->tx_bytes > xspi->tx_fifo_depth)
                        cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
        }
 
+       /* When xspi in busy condition, bytes may send failed,
+        * then spi control didn't work thoroughly, add one byte delay
+        */
+       if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
+               udelay(10);
+
        cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0);
        spi_transfer_delay_exec(transfer);
 
index 6d10fa4..7ddf9db 100644 (file)
@@ -1001,9 +1001,9 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
        if (spi->cfg->set_number_of_data) {
                int ret;
 
-               ret = spi_split_transfers_maxsize(ctrl, msg,
-                                                 STM32H7_SPI_TSIZE_MAX,
-                                                 GFP_KERNEL | GFP_DMA);
+               ret = spi_split_transfers_maxwords(ctrl, msg,
+                                                  STM32H7_SPI_TSIZE_MAX,
+                                                  GFP_KERNEL | GFP_DMA);
                if (ret)
                        return ret;
        }
index 62b26b7..3fb4553 100644 (file)
@@ -1964,6 +1964,8 @@ unlock:
 
        pm_runtime_mark_last_busy(&tb->dev);
        pm_runtime_put_autosuspend(&tb->dev);
+
+       kfree(ev);
 }
 
 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port)
index 1269f41..0dfd1e0 100644 (file)
@@ -579,7 +579,9 @@ int tb_switch_tmu_disable(struct tb_switch *sw)
                 * uni-directional mode and we don't want to change it's TMU
                 * mode.
                 */
-               tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+               ret = tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+               if (ret)
+                       return ret;
 
                tb_port_tmu_time_sync_disable(up);
                ret = tb_port_tmu_time_sync_disable(down);
index 341abae..069de55 100644 (file)
@@ -164,6 +164,9 @@ config LEGACY_TIOCSTI
          userspace depends on this functionality to continue operating
          normally.
 
+         Processes which run with CAP_SYS_ADMIN, such as BRLTTY, can
+         use TIOCSTI even when this is set to N.
+
          This functionality can be changed at runtime with the
          dev.tty.legacy_tiocsti sysctl. This configuration option sets
          the default value of the sysctl.
index 1cdefac..739f522 100644 (file)
@@ -3042,12 +3042,13 @@ static void gsm_error(struct gsm_mux *gsm)
 static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
 {
        int i;
-       struct gsm_dlci *dlci = gsm->dlci[0];
+       struct gsm_dlci *dlci;
        struct gsm_msg *txq, *ntxq;
 
        gsm->dead = true;
        mutex_lock(&gsm->mutex);
 
+       dlci = gsm->dlci[0];
        if (dlci) {
                if (disc && dlci->state != DLCI_CLOSED) {
                        gsm_dlci_begin_close(dlci);
index 914e0e6..3449f87 100644 (file)
@@ -497,6 +497,7 @@ static struct uart_8250_port *serial8250_setup_port(int index)
 
        up = &serial8250_ports[index];
        up->port.line = index;
+       up->port.port_id = index;
 
        serial8250_init_port(up);
        if (!base_ops)
@@ -1040,6 +1041,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
                        uart_remove_one_port(&serial8250_reg, &uart->port);
 
                uart->port.ctrl_id      = up->port.ctrl_id;
+               uart->port.port_id      = up->port.port_id;
                uart->port.iobase       = up->port.iobase;
                uart->port.membase      = up->port.membase;
                uart->port.irq          = up->port.irq;
@@ -1202,6 +1204,7 @@ void serial8250_unregister_port(int line)
                uart->port.flags &= ~UPF_BOOT_AUTOCONF;
                uart->port.type = PORT_UNKNOWN;
                uart->port.dev = &serial8250_isa_devs->dev;
+               uart->port.port_id = line;
                uart->capabilities = 0;
                serial8250_init_port(uart);
                serial8250_apply_quirks(uart);
index 16aeb14..483bb55 100644 (file)
@@ -703,9 +703,6 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 
 static void serial8250_clear_IER(struct uart_8250_port *up)
 {
-       /* Port locked to synchronize UART_IER access against the console. */
-       lockdep_assert_held_once(&up->port.lock);
-
        if (up->capabilities & UART_CAP_UUE)
                serial_out(up, UART_IER, UART_IER_UUE);
        else
@@ -3278,6 +3275,7 @@ void serial8250_init_port(struct uart_8250_port *up)
 
        spin_lock_init(&port->lock);
        port->ctrl_id = 0;
+       port->pm = NULL;
        port->ops = &serial8250_pops;
        port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
 
index 4d80fae..c569a08 100644 (file)
@@ -1139,8 +1139,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
                unsigned long sr = lpuart32_read(&sport->port, UARTSTAT);
 
                if (sr & (UARTSTAT_PE | UARTSTAT_FE)) {
-                       /* Read DR to clear the error flags */
-                       lpuart32_read(&sport->port, UARTDATA);
+                       /* Clear the error flags */
+                       lpuart32_write(&sport->port, sr, UARTSTAT);
 
                        if (sr & UARTSTAT_PE)
                                sport->port.icount.parity++;
index 9faac0f..c74c548 100644 (file)
@@ -16,6 +16,7 @@ struct device;
 
 struct serial_ctrl_device {
        struct device dev;
+       struct ida port_ida;
 };
 
 struct serial_port_device {
index 6ff59c8..3dfcf20 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/container_of.h>
 #include <linux/device.h>
+#include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/serial_core.h>
 #include <linux/slab.h>
 
 static bool serial_base_initialized;
 
+static const struct device_type serial_ctrl_type = {
+       .name = "ctrl",
+};
+
+static const struct device_type serial_port_type = {
+       .name = "port",
+};
+
 static int serial_base_match(struct device *dev, struct device_driver *drv)
 {
-       int len = strlen(drv->name);
+       if (dev->type == &serial_ctrl_type &&
+           str_has_prefix(drv->name, serial_ctrl_type.name))
+               return 1;
 
-       return !strncmp(dev_name(dev), drv->name, len);
+       if (dev->type == &serial_port_type &&
+           str_has_prefix(drv->name, serial_port_type.name))
+               return 1;
+
+       return 0;
 }
 
 static struct bus_type serial_base_bus_type = {
@@ -48,7 +63,8 @@ static int serial_base_device_init(struct uart_port *port,
                                   struct device *parent_dev,
                                   const struct device_type *type,
                                   void (*release)(struct device *dev),
-                                  int id)
+                                  unsigned int ctrl_id,
+                                  unsigned int port_id)
 {
        device_initialize(dev);
        dev->type = type;
@@ -61,12 +77,15 @@ static int serial_base_device_init(struct uart_port *port,
                return -EPROBE_DEFER;
        }
 
-       return dev_set_name(dev, "%s.%s.%d", type->name, dev_name(port->dev), id);
-}
+       if (type == &serial_ctrl_type)
+               return dev_set_name(dev, "%s:%d", dev_name(port->dev), ctrl_id);
 
-static const struct device_type serial_ctrl_type = {
-       .name = "ctrl",
-};
+       if (type == &serial_port_type)
+               return dev_set_name(dev, "%s:%d.%d", dev_name(port->dev),
+                                   ctrl_id, port_id);
+
+       return -EINVAL;
+}
 
 static void serial_base_ctrl_release(struct device *dev)
 {
@@ -81,6 +100,7 @@ void serial_base_ctrl_device_remove(struct serial_ctrl_device *ctrl_dev)
                return;
 
        device_del(&ctrl_dev->dev);
+       put_device(&ctrl_dev->dev);
 }
 
 struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
@@ -93,10 +113,12 @@ struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
        if (!ctrl_dev)
                return ERR_PTR(-ENOMEM);
 
+       ida_init(&ctrl_dev->port_ida);
+
        err = serial_base_device_init(port, &ctrl_dev->dev,
                                      parent, &serial_ctrl_type,
                                      serial_base_ctrl_release,
-                                     port->ctrl_id);
+                                     port->ctrl_id, 0);
        if (err)
                goto err_put_device;
 
@@ -112,10 +134,6 @@ err_put_device:
        return ERR_PTR(err);
 }
 
-static const struct device_type serial_port_type = {
-       .name = "port",
-};
-
 static void serial_base_port_release(struct device *dev)
 {
        struct serial_port_device *port_dev = to_serial_base_port_device(dev);
@@ -127,16 +145,31 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
                                                struct serial_ctrl_device *ctrl_dev)
 {
        struct serial_port_device *port_dev;
+       int min = 0, max = -1;  /* Use -1 for max to apply IDA defaults */
        int err;
 
        port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL);
        if (!port_dev)
                return ERR_PTR(-ENOMEM);
 
+       /* Device driver specified port_id vs automatic assignment? */
+       if (port->port_id) {
+               min = port->port_id;
+               max = port->port_id;
+       }
+
+       err = ida_alloc_range(&ctrl_dev->port_ida, min, max, GFP_KERNEL);
+       if (err < 0) {
+               kfree(port_dev);
+               return ERR_PTR(err);
+       }
+
+       port->port_id = err;
+
        err = serial_base_device_init(port, &port_dev->dev,
                                      &ctrl_dev->dev, &serial_port_type,
                                      serial_base_port_release,
-                                     port->line);
+                                     port->ctrl_id, port->port_id);
        if (err)
                goto err_put_device;
 
@@ -150,16 +183,25 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
 
 err_put_device:
        put_device(&port_dev->dev);
+       ida_free(&ctrl_dev->port_ida, port->port_id);
 
        return ERR_PTR(err);
 }
 
 void serial_base_port_device_remove(struct serial_port_device *port_dev)
 {
+       struct serial_ctrl_device *ctrl_dev;
+       struct device *parent;
+
        if (!port_dev)
                return;
 
+       parent = port_dev->dev.parent;
+       ctrl_dev = to_serial_base_ctrl_device(parent);
+
        device_del(&port_dev->dev);
+       ida_free(&ctrl_dev->port_ida, port_dev->port->port_id);
+       put_device(&port_dev->dev);
 }
 
 static int serial_base_init(void)
index 6fb0e00..386674e 100644 (file)
@@ -580,7 +580,6 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
 {
        struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
        struct utp_transfer_req_desc *utrd;
-       u32 mask = hwq->max_entries - 1;
        __le64  cmd_desc_base_addr;
        bool ret = false;
        u64 addr, match;
@@ -608,7 +607,10 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
                        ret = true;
                        goto out;
                }
-               sq_head_slot = (sq_head_slot + 1) & mask;
+
+               sq_head_slot++;
+               if (sq_head_slot == hwq->max_entries)
+                       sq_head_slot = 0;
        }
 
 out:
index 8d6fd4c..c1557d2 100644 (file)
@@ -321,7 +321,7 @@ static void ufs_qcom_select_unipro_mode(struct ufs_qcom_host *host)
                   ufs_qcom_cap_qunipro(host) ? QUNIPRO_SEL : 0,
                   REG_UFS_CFG1);
 
-       if (host->hw_ver.major == 0x05)
+       if (host->hw_ver.major >= 0x05)
                ufshcd_rmwl(host->hba, QUNIPRO_G4_SEL, 0, REG_UFS_CFG0);
 
        /* make sure above configuration is applied before we return */
index f8a5e79..ab0652d 100644 (file)
@@ -359,7 +359,7 @@ static int ufs_renesas_init(struct ufs_hba *hba)
 {
        struct ufs_renesas_priv *priv;
 
-       priv = devm_kmalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
+       priv = devm_kzalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
        ufshcd_set_variant(hba, priv);
index 766005d..501e8bc 100644 (file)
@@ -42,6 +42,7 @@ struct usb_conn_info {
 
        struct power_supply_desc desc;
        struct power_supply *charger;
+       bool initial_detection;
 };
 
 /*
@@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work)
        dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n",
                usb_role_string(info->last_role), usb_role_string(role), id, vbus);
 
-       if (info->last_role == role) {
+       if (!info->initial_detection && info->last_role == role) {
                dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role));
                return;
        }
 
+       info->initial_detection = false;
+
        if (info->last_role == USB_ROLE_HOST && info->vbus)
                regulator_disable(info->vbus);
 
@@ -258,6 +261,7 @@ static int usb_conn_probe(struct platform_device *pdev)
        device_set_wakeup_capable(&pdev->dev, true);
 
        /* Perform initial detection */
+       info->initial_detection = true;
        usb_conn_queue_dwork(info, 0);
 
        return 0;
index 5fd0671..858fe4c 100644 (file)
@@ -4455,9 +4455,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
        u32 count;
 
        if (pm_runtime_suspended(dwc->dev)) {
+               dwc->pending_events = true;
+               /*
+                * Trigger runtime resume. The get() function will be balanced
+                * after processing the pending events in dwc3_process_pending
+                * events().
+                */
                pm_runtime_get(dwc->dev);
                disable_irq_nosync(dwc->irq_gadget);
-               dwc->pending_events = true;
                return IRQ_HANDLED;
        }
 
@@ -4718,6 +4723,8 @@ void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
 {
        if (dwc->pending_events) {
                dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf);
+               dwc3_thread_interrupt(dwc->irq_gadget, dwc->ev_buf);
+               pm_runtime_put(dwc->dev);
                dwc->pending_events = false;
                enable_irq(dwc->irq_gadget);
        }
index cd58f2a..7d49d8a 100644 (file)
@@ -822,6 +822,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_disconnect);
  * usb_gadget_activate() is called.  For example, user mode components may
  * need to be activated before the system can talk to hosts.
  *
+ * This routine may sleep; it must not be called in interrupt context
+ * (such as from within a gadget driver's disconnect() callback).
+ *
  * Returns zero on success, else negative errno.
  */
 int usb_gadget_deactivate(struct usb_gadget *gadget)
@@ -860,6 +863,8 @@ EXPORT_SYMBOL_GPL(usb_gadget_deactivate);
  * This routine activates gadget which was previously deactivated with
  * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
  *
+ * This routine may sleep; it must not be called in interrupt context.
+ *
  * Returns zero on success, else negative errno.
  */
 int usb_gadget_activate(struct usb_gadget *gadget)
@@ -1638,7 +1643,11 @@ static void gadget_unbind_driver(struct device *dev)
        usb_gadget_disable_async_callbacks(udc);
        if (gadget->irq)
                synchronize_irq(gadget->irq);
+       mutex_unlock(&udc->connect_lock);
+
        udc->driver->unbind(gadget);
+
+       mutex_lock(&udc->connect_lock);
        usb_gadget_udc_stop_locked(udc);
        mutex_unlock(&udc->connect_lock);
 
index 5e912dd..115f05a 100644 (file)
@@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data)
        rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
                command, 0xc0, 0, 1, data, 2);
 
-       usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
+       if (rc == USB_STOR_XFER_GOOD)
+               usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
 
        return rc;
 }
@@ -454,9 +455,14 @@ static int alauda_init_media(struct us_data *us)
 static int alauda_check_media(struct us_data *us)
 {
        struct alauda_info *info = (struct alauda_info *) us->extra;
-       unsigned char status[2];
+       unsigned char *status = us->iobuf;
+       int rc;
 
-       alauda_get_media_status(us, status);
+       rc = alauda_get_media_status(us, status);
+       if (rc != USB_STOR_XFER_GOOD) {
+               status[0] = 0xF0;       /* Pretend there's no media */
+               status[1] = 0;
+       }
 
        /* Check for no media or door open */
        if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
index 66de880..cdf8261 100644 (file)
@@ -60,6 +60,7 @@ struct dp_altmode {
 
        enum dp_state state;
        bool hpd;
+       bool pending_hpd;
 
        struct mutex lock; /* device lock */
        struct work_struct work;
@@ -144,8 +145,13 @@ static int dp_altmode_status_update(struct dp_altmode *dp)
                dp->state = DP_STATE_EXIT;
        } else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
                ret = dp_altmode_configure(dp, con);
-               if (!ret)
+               if (!ret) {
                        dp->state = DP_STATE_CONFIGURE;
+                       if (dp->hpd != hpd) {
+                               dp->hpd = hpd;
+                               dp->pending_hpd = true;
+                       }
+               }
        } else {
                if (dp->hpd != hpd) {
                        drm_connector_oob_hotplug_event(dp->connector_fwnode);
@@ -161,6 +167,16 @@ static int dp_altmode_configured(struct dp_altmode *dp)
 {
        sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
        sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+       /*
+        * If the DFP_D/UFP_D sends a change in HPD when first notifying the
+        * DisplayPort driver that it is connected, then we wait until
+        * configuration is complete to signal HPD.
+        */
+       if (dp->pending_hpd) {
+               drm_connector_oob_hotplug_event(dp->connector_fwnode);
+               sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd");
+               dp->pending_hpd = false;
+       }
 
        return dp_altmode_notify(dp);
 }
index 784b9d8..65da611 100644 (file)
@@ -29,6 +29,7 @@ config TYPEC_MUX_INTEL_PMC
        tristate "Intel PMC mux control"
        depends on ACPI
        depends on INTEL_SCU_IPC
+       select USB_COMMON
        select USB_ROLE_SWITCH
        help
          Driver for USB muxes controlled by Intel PMC FW. Intel PMC FW can
index 80e580d..4d1122d 100644 (file)
@@ -463,16 +463,18 @@ static int nb7vpq904m_probe(struct i2c_client *client)
 
        ret = nb7vpq904m_register_bridge(nb7);
        if (ret)
-               return ret;
+               goto err_disable_gpio;
 
        sw_desc.drvdata = nb7;
        sw_desc.fwnode = dev->fwnode;
        sw_desc.set = nb7vpq904m_sw_set;
 
        nb7->sw = typec_switch_register(dev, &sw_desc);
-       if (IS_ERR(nb7->sw))
-               return dev_err_probe(dev, PTR_ERR(nb7->sw),
-                                    "Error registering typec switch\n");
+       if (IS_ERR(nb7->sw)) {
+               ret = dev_err_probe(dev, PTR_ERR(nb7->sw),
+                                   "Error registering typec switch\n");
+               goto err_disable_gpio;
+       }
 
        retimer_desc.drvdata = nb7;
        retimer_desc.fwnode = dev->fwnode;
@@ -480,12 +482,21 @@ static int nb7vpq904m_probe(struct i2c_client *client)
 
        nb7->retimer = typec_retimer_register(dev, &retimer_desc);
        if (IS_ERR(nb7->retimer)) {
-               typec_switch_unregister(nb7->sw);
-               return dev_err_probe(dev, PTR_ERR(nb7->retimer),
-                                    "Error registering typec retimer\n");
+               ret = dev_err_probe(dev, PTR_ERR(nb7->retimer),
+                                   "Error registering typec retimer\n");
+               goto err_switch_unregister;
        }
 
        return 0;
+
+err_switch_unregister:
+       typec_switch_unregister(nb7->sw);
+
+err_disable_gpio:
+       gpiod_set_value(nb7->enable_gpio, 0);
+       regulator_disable(nb7->vcc_supply);
+
+       return ret;
 }
 
 static void nb7vpq904m_remove(struct i2c_client *client)
index 829d75e..cc1d839 100644 (file)
@@ -5349,6 +5349,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
                /* Do nothing, vbus drop expected */
                break;
 
+       case SNK_HARD_RESET_WAIT_VBUS:
+               /* Do nothing, its OK to receive vbus off events */
+               break;
+
        default:
                if (port->pwr_role == TYPEC_SINK && port->attached)
                        tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port));
@@ -5395,6 +5399,9 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
        case SNK_DEBOUNCED:
                /*Do nothing, still waiting for VSAFE5V for connect */
                break;
+       case SNK_HARD_RESET_WAIT_VBUS:
+               /* Do nothing, its OK to receive vbus off events */
+               break;
        default:
                if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
                        tcpm_set_state(port, SNK_UNATTACHED, 0);
index 25fc412..b53420e 100644 (file)
@@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
        struct list_head head;
        unsigned long num_directs;
        unsigned long num_klms;
+       /* state of dvq mr */
        bool initialized;
 
        /* serialize mkey creation and destruction */
@@ -121,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
                        unsigned int asid);
 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
 
 #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
        dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
index 03e5432..5a1971f 100644 (file)
@@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
        }
 }
 
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+               return;
+
+       prune_iotlb(mvdev);
+}
+
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 {
        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 
-       mutex_lock(&mr->mkey_mtx);
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
+               return;
+
        if (!mr->initialized)
-               goto out;
+               return;
 
-       prune_iotlb(mvdev);
        if (mr->user_mr)
                destroy_user_mr(mvdev, mr);
        else
                destroy_dma_mr(mvdev, mr);
 
        mr->initialized = false;
-out:
+}
+
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+       struct mlx5_vdpa_mr *mr = &mvdev->mr;
+
+       mutex_lock(&mr->mkey_mtx);
+
+       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
+       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+
        mutex_unlock(&mr->mkey_mtx);
 }
 
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
-                               struct vhost_iotlb *iotlb, unsigned int asid)
+void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+{
+       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
+       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
+}
+
+static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
+                                   struct vhost_iotlb *iotlb,
+                                   unsigned int asid)
+{
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+               return 0;
+
+       return dup_iotlb(mvdev, iotlb);
+}
+
+static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
+                                   struct vhost_iotlb *iotlb,
+                                   unsigned int asid)
 {
        struct mlx5_vdpa_mr *mr = &mvdev->mr;
        int err;
 
-       if (mr->initialized)
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
                return 0;
 
-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-               if (iotlb)
-                       err = create_user_mr(mvdev, iotlb);
-               else
-                       err = create_dma_mr(mvdev, mr);
+       if (mr->initialized)
+               return 0;
 
-               if (err)
-                       return err;
-       }
+       if (iotlb)
+               err = create_user_mr(mvdev, iotlb);
+       else
+               err = create_dma_mr(mvdev, mr);
 
-       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
-               err = dup_iotlb(mvdev, iotlb);
-               if (err)
-                       goto out_err;
-       }
+       if (err)
+               return err;
 
        mr->initialized = true;
+
+       return 0;
+}
+
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+                               struct vhost_iotlb *iotlb, unsigned int asid)
+{
+       int err;
+
+       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
+       if (err)
+               return err;
+
+       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
+       if (err)
+               goto out_err;
+
        return 0;
 
 out_err:
-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-               if (iotlb)
-                       destroy_user_mr(mvdev, mr);
-               else
-                       destroy_dma_mr(mvdev, mr);
-       }
+       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
 
        return err;
 }
index 9138ef2..37be945 100644 (file)
@@ -2517,7 +2517,15 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
        else
                ndev->rqt_size = 1;
 
-       ndev->cur_num_vqs = 2 * ndev->rqt_size;
+       /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
+        * 5.1.6.5.5 "Device operation in multiqueue mode":
+        *
+        * Multiqueue is disabled by default.
+        * The driver enables multiqueue by sending a command using class
+        * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
+        * operation, as follows: ...
+        */
+       ndev->cur_num_vqs = 2;
 
        update_cvq_info(mvdev);
        return err;
@@ -2636,7 +2644,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
                goto err_mr;
 
        teardown_driver(ndev);
-       mlx5_vdpa_destroy_mr(mvdev);
+       mlx5_vdpa_destroy_mr_asid(mvdev, asid);
        err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
        if (err)
                goto err_mr;
@@ -2652,7 +2660,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
        return 0;
 
 err_setup:
-       mlx5_vdpa_destroy_mr(mvdev);
+       mlx5_vdpa_destroy_mr_asid(mvdev, asid);
 err_mr:
        return err;
 }
@@ -3548,17 +3556,6 @@ static void mlx5v_remove(struct auxiliary_device *adev)
        kfree(mgtdev);
 }
 
-static void mlx5v_shutdown(struct auxiliary_device *auxdev)
-{
-       struct mlx5_vdpa_mgmtdev *mgtdev;
-       struct mlx5_vdpa_net *ndev;
-
-       mgtdev = auxiliary_get_drvdata(auxdev);
-       ndev = mgtdev->ndev;
-
-       free_irqs(ndev);
-}
-
 static const struct auxiliary_device_id mlx5v_id_table[] = {
        { .name = MLX5_ADEV_NAME ".vnet", },
        {},
@@ -3570,7 +3567,6 @@ static struct auxiliary_driver mlx5v_driver = {
        .name = "vnet",
        .probe = mlx5v_probe,
        .remove = mlx5v_remove,
-       .shutdown = mlx5v_shutdown,
        .id_table = mlx5v_id_table,
 };
 
index 2e22418..c2d314d 100644 (file)
@@ -5,6 +5,5 @@ obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
 
 pds_vdpa-y := aux_drv.o \
              cmds.o \
+             debugfs.o \
              vdpa_dev.o
-
-pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
index 21a0dc0..9b04aad 100644 (file)
@@ -176,6 +176,7 @@ static int identity_show(struct seq_file *seq, void *v)
 {
        struct pds_vdpa_aux *vdpa_aux = seq->private;
        struct vdpa_mgmt_dev *mgmt;
+       u64 hw_features;
 
        seq_printf(seq, "aux_dev:            %s\n",
                   dev_name(&vdpa_aux->padev->aux_dev.dev));
@@ -183,8 +184,9 @@ static int identity_show(struct seq_file *seq, void *v)
        mgmt = &vdpa_aux->vdpa_mdev;
        seq_printf(seq, "max_vqs:            %d\n", mgmt->max_supported_vqs);
        seq_printf(seq, "config_attr_mask:   %#llx\n", mgmt->config_attr_mask);
-       seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features);
-       print_feature_bits_all(seq, mgmt->supported_features);
+       hw_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+       seq_printf(seq, "hw_features:        %#llx\n", hw_features);
+       print_feature_bits_all(seq, hw_features);
 
        return 0;
 }
@@ -200,7 +202,6 @@ static int config_show(struct seq_file *seq, void *v)
 {
        struct pds_vdpa_device *pdsv = seq->private;
        struct virtio_net_config vc;
-       u64 driver_features;
        u8 status;
 
        memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device,
@@ -223,12 +224,8 @@ static int config_show(struct seq_file *seq, void *v)
        status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
        seq_printf(seq, "dev_status:           %#x\n", status);
        print_status_bits(seq, status);
-
-       seq_printf(seq, "req_features:         %#llx\n", pdsv->req_features);
-       print_feature_bits_all(seq, pdsv->req_features);
-       driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
-       seq_printf(seq, "driver_features:      %#llx\n", driver_features);
-       print_feature_bits_all(seq, driver_features);
+       seq_printf(seq, "negotiated_features:  %#llx\n", pdsv->negotiated_features);
+       print_feature_bits_all(seq, pdsv->negotiated_features);
        seq_printf(seq, "vdpa_index:           %d\n", pdsv->vdpa_index);
        seq_printf(seq, "num_vqs:              %d\n", pdsv->num_vqs);
 
index 5071a4d..52b2449 100644 (file)
@@ -126,11 +126,9 @@ static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid)
 static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
-       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
        struct device *dev = &pdsv->vdpa_dev.dev;
        u64 driver_features;
        u16 invert_idx = 0;
-       int irq;
        int err;
 
        dev_dbg(dev, "%s: qid %d ready %d => %d\n",
@@ -143,19 +141,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                invert_idx = PDS_VDPA_PACKED_INVERT_IDX;
 
        if (ready) {
-               irq = pci_irq_vector(pdev, qid);
-               snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
-                        "vdpa-%s-%d", dev_name(dev), qid);
-
-               err = request_irq(irq, pds_vdpa_isr, 0,
-                                 pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]);
-               if (err) {
-                       dev_err(dev, "%s: no irq for qid %d: %pe\n",
-                               __func__, qid, ERR_PTR(err));
-                       return;
-               }
-               pdsv->vqs[qid].irq = irq;
-
                /* Pass vq setup info to DSC using adminq to gather up and
                 * send all info at once so FW can do its full set up in
                 * one easy operation
@@ -164,7 +149,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                if (err) {
                        dev_err(dev, "Failed to init vq %d: %pe\n",
                                qid, ERR_PTR(err));
-                       pds_vdpa_release_irq(pdsv, qid);
                        ready = false;
                }
        } else {
@@ -172,7 +156,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                if (err)
                        dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
                                __func__, qid, ERR_PTR(err));
-               pds_vdpa_release_irq(pdsv, qid);
        }
 
        pdsv->vqs[qid].ready = ready;
@@ -318,6 +301,7 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
        struct device *dev = &pdsv->vdpa_dev.dev;
        u64 driver_features;
        u64 nego_features;
+       u64 hw_features;
        u64 missing;
 
        if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
@@ -325,21 +309,26 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
                return -EOPNOTSUPP;
        }
 
-       pdsv->req_features = features;
-
        /* Check for valid feature bits */
-       nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
-       missing = pdsv->req_features & ~nego_features;
+       nego_features = features & pdsv->supported_features;
+       missing = features & ~nego_features;
        if (missing) {
                dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n",
-                       pdsv->req_features, missing);
+                       features, missing);
                return -EOPNOTSUPP;
        }
 
+       pdsv->negotiated_features = nego_features;
+
        driver_features = pds_vdpa_get_driver_features(vdpa_dev);
        dev_dbg(dev, "%s: %#llx => %#llx\n",
                __func__, driver_features, nego_features);
 
+       /* if we're faking the F_MAC, strip it before writing to device */
+       hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
+       if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC)))
+               nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
+
        if (driver_features == nego_features)
                return 0;
 
@@ -352,7 +341,7 @@ static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
 
-       return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
+       return pdsv->negotiated_features;
 }
 
 static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
@@ -389,6 +378,72 @@ static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev)
        return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
 }
 
+static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv)
+{
+       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+       struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+       struct device *dev = &pdsv->vdpa_dev.dev;
+       int max_vq, nintrs, qid, err;
+
+       max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs;
+
+       nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX);
+       if (nintrs < 0) {
+               dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
+                       max_vq, ERR_PTR(nintrs));
+               return nintrs;
+       }
+
+       for (qid = 0; qid < pdsv->num_vqs; ++qid) {
+               int irq = pci_irq_vector(pdev, qid);
+
+               snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
+                        "vdpa-%s-%d", dev_name(dev), qid);
+
+               err = request_irq(irq, pds_vdpa_isr, 0,
+                                 pdsv->vqs[qid].irq_name,
+                                 &pdsv->vqs[qid]);
+               if (err) {
+                       dev_err(dev, "%s: no irq for qid %d: %pe\n",
+                               __func__, qid, ERR_PTR(err));
+                       goto err_release;
+               }
+
+               pdsv->vqs[qid].irq = irq;
+       }
+
+       vdpa_aux->nintrs = nintrs;
+
+       return 0;
+
+err_release:
+       while (qid--)
+               pds_vdpa_release_irq(pdsv, qid);
+
+       pci_free_irq_vectors(pdev);
+
+       vdpa_aux->nintrs = 0;
+
+       return err;
+}
+
+static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv)
+{
+       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+       struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+       int qid;
+
+       if (!vdpa_aux->nintrs)
+               return;
+
+       for (qid = 0; qid < pdsv->num_vqs; qid++)
+               pds_vdpa_release_irq(pdsv, qid);
+
+       pci_free_irq_vectors(pdev);
+
+       vdpa_aux->nintrs = 0;
+}
+
 static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
@@ -399,6 +454,11 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
        old_status = pds_vdpa_get_status(vdpa_dev);
        dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status);
 
+       if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               if (pds_vdpa_request_irqs(pdsv))
+                       status = old_status | VIRTIO_CONFIG_S_FAILED;
+       }
+
        pds_vdpa_cmd_set_status(pdsv, status);
 
        /* Note: still working with FW on the need for this reset cmd */
@@ -409,6 +469,8 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
                        pdsv->vqs[i].avail_idx = 0;
                        pdsv->vqs[i].used_idx = 0;
                }
+
+               pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
        }
 
        if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) {
@@ -418,6 +480,20 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
                                                        i, &pdsv->vqs[i].notify_pa);
                }
        }
+
+       if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK)
+               pds_vdpa_release_irqs(pdsv);
+}
+
+static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid,
+                                   void __iomem *notify)
+{
+       memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0]));
+       pdsv->vqs[qid].qid = qid;
+       pdsv->vqs[qid].pdsv = pdsv;
+       pdsv->vqs[qid].ready = false;
+       pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR;
+       pdsv->vqs[qid].notify = notify;
 }
 
 static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
@@ -441,14 +517,17 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
                        if (err)
                                dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
                                        __func__, i, ERR_PTR(err));
-                       pds_vdpa_release_irq(pdsv, i);
-                       memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0]));
-                       pdsv->vqs[i].ready = false;
                }
        }
 
        pds_vdpa_set_status(vdpa_dev, 0);
 
+       if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               /* Reset the vq info */
+               for (i = 0; i < pdsv->num_vqs && !err; i++)
+                       pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify);
+       }
+
        return 0;
 }
 
@@ -532,7 +611,6 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        struct device *dma_dev;
        struct pci_dev *pdev;
        struct device *dev;
-       u8 mac[ETH_ALEN];
        int err;
        int i;
 
@@ -563,7 +641,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
 
        if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
                u64 unsupp_features =
-                       add_config->device_features & ~mgmt->supported_features;
+                       add_config->device_features & ~pdsv->supported_features;
 
                if (unsupp_features) {
                        dev_err(dev, "Unsupported features: %#llx\n", unsupp_features);
@@ -614,29 +692,30 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        }
 
        /* Set a mac, either from the user config if provided
-        * or set a random mac if default is 00:..:00
+        * or use the device's mac if not 00:..:00
+        * or set a random mac
         */
        if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
-               ether_addr_copy(mac, add_config->net.mac);
-               pds_vdpa_cmd_set_mac(pdsv, mac);
+               ether_addr_copy(pdsv->mac, add_config->net.mac);
        } else {
                struct virtio_net_config __iomem *vc;
 
                vc = pdsv->vdpa_aux->vd_mdev.device;
-               memcpy_fromio(mac, vc->mac, sizeof(mac));
-               if (is_zero_ether_addr(mac)) {
-                       eth_random_addr(mac);
-                       dev_info(dev, "setting random mac %pM\n", mac);
-                       pds_vdpa_cmd_set_mac(pdsv, mac);
+               memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac));
+               if (is_zero_ether_addr(pdsv->mac) &&
+                   (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) {
+                       eth_random_addr(pdsv->mac);
+                       dev_info(dev, "setting random mac %pM\n", pdsv->mac);
                }
        }
+       pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
 
        for (i = 0; i < pdsv->num_vqs; i++) {
-               pdsv->vqs[i].qid = i;
-               pdsv->vqs[i].pdsv = pdsv;
-               pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR;
-               pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
-                                                             i, &pdsv->vqs[i].notify_pa);
+               void __iomem *notify;
+
+               notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
+                                                i, &pdsv->vqs[i].notify_pa);
+               pds_vdpa_init_vqs_entry(pdsv, i, notify);
        }
 
        pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev;
@@ -746,24 +825,19 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux)
 
        max_vqs = min_t(u16, dev_intrs, max_vqs);
        mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs);
-       vdpa_aux->nintrs = mgmt->max_supported_vqs;
+       vdpa_aux->nintrs = 0;
 
        mgmt->ops = &pds_vdpa_mgmt_dev_ops;
        mgmt->id_table = pds_vdpa_id_table;
        mgmt->device = dev;
        mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+
+       /* advertise F_MAC even if the device doesn't */
+       mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC);
+
        mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
        mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
        mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
 
-       err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs,
-                                   PCI_IRQ_MSIX);
-       if (err < 0) {
-               dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
-                       vdpa_aux->nintrs, ERR_PTR(err));
-               return err;
-       }
-       vdpa_aux->nintrs = err;
-
        return 0;
 }
index a1bc37d..d984ba2 100644 (file)
@@ -35,10 +35,11 @@ struct pds_vdpa_device {
        struct pds_vdpa_aux *vdpa_aux;
 
        struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES];
-       u64 supported_features;         /* specified device features */
-       u64 req_features;               /* features requested by vdpa */
+       u64 supported_features;         /* supported device features */
+       u64 negotiated_features;        /* negotiated features */
        u8 vdpa_index;                  /* rsvd for future subdevice use */
        u8 num_vqs;                     /* num vqs in use */
+       u8 mac[ETH_ALEN];               /* mac selected when the device was added */
        struct vdpa_callback config_cb;
        struct notifier_block nb;
 };
index 965e325..a7612e0 100644 (file)
@@ -1247,44 +1247,41 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
        [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
        [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
        [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+       [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 },
        /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
        [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
+       [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
+       [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
 };
 
 static const struct genl_ops vdpa_nl_ops[] = {
        {
                .cmd = VDPA_CMD_MGMTDEV_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_mgmtdev_get_doit,
                .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_NEW,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_add_set_doit,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = VDPA_CMD_DEV_DEL,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_del_set_doit,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = VDPA_CMD_DEV_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_get_doit,
                .dumpit = vdpa_nl_cmd_dev_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_CONFIG_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_config_get_doit,
                .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_VSTATS_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_stats_get_doit,
                .flags = GENL_ADMIN_PERM,
        },
index dc38ed2..df78695 100644 (file)
@@ -935,10 +935,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
 {
        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 
-       spin_lock_irq(&dev->irq_lock);
+       spin_lock_bh(&dev->irq_lock);
        if (dev->config_cb.callback)
                dev->config_cb.callback(dev->config_cb.private);
-       spin_unlock_irq(&dev->irq_lock);
+       spin_unlock_bh(&dev->irq_lock);
 }
 
 static void vduse_vq_irq_inject(struct work_struct *work)
@@ -946,10 +946,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
        struct vduse_virtqueue *vq = container_of(work,
                                        struct vduse_virtqueue, inject);
 
-       spin_lock_irq(&vq->irq_lock);
+       spin_lock_bh(&vq->irq_lock);
        if (vq->ready && vq->cb.callback)
                vq->cb.callback(vq->cb.private);
-       spin_unlock_irq(&vq->irq_lock);
+       spin_unlock_bh(&vq->irq_lock);
 }
 
 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
index c83f7f0..abef061 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 #include <linux/miscdevice.h>
+#include <linux/blk_types.h>
+#include <linux/bio.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -75,6 +77,9 @@ struct vhost_scsi_cmd {
        u32 tvc_prot_sgl_count;
        /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */
        u32 tvc_lun;
+       u32 copied_iov:1;
+       const void *saved_iter_addr;
+       struct iov_iter saved_iter;
        /* Pointer to the SGL formatted memory from virtio-scsi */
        struct scatterlist *tvc_sgl;
        struct scatterlist *tvc_prot_sgl;
@@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
        int i;
 
        if (tv_cmd->tvc_sgl_count) {
-               for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
-                       put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+               for (i = 0; i < tv_cmd->tvc_sgl_count; i++) {
+                       if (tv_cmd->copied_iov)
+                               __free_page(sg_page(&tv_cmd->tvc_sgl[i]));
+                       else
+                               put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+               }
+               kfree(tv_cmd->saved_iter_addr);
        }
        if (tv_cmd->tvc_prot_sgl_count) {
                for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
@@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
        mutex_unlock(&vq->mutex);
 }
 
+static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
+{
+       struct iov_iter *iter = &cmd->saved_iter;
+       struct scatterlist *sg = cmd->tvc_sgl;
+       struct page *page;
+       size_t len;
+       int i;
+
+       for (i = 0; i < cmd->tvc_sgl_count; i++) {
+               page = sg_page(&sg[i]);
+               len = sg[i].length;
+
+               if (copy_page_to_iter(page, 0, len, iter) != len) {
+                       pr_err("Could not copy data while handling misaligned cmd. Error %zu\n",
+                              len);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 /* Fill in status and signal that we are done processing this command
  *
  * This is scheduled in the vhost work queue so we are called with the owner
@@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 
                pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
                        cmd, se_cmd->residual_count, se_cmd->scsi_status);
-
                memset(&v_rsp, 0, sizeof(v_rsp));
-               v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
-               /* TODO is status_qualifier field needed? */
-               v_rsp.status = se_cmd->scsi_status;
-               v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
-                                                se_cmd->scsi_sense_length);
-               memcpy(v_rsp.sense, cmd->tvc_sense_buf,
-                      se_cmd->scsi_sense_length);
+
+               if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) {
+                       v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
+               } else {
+                       v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq,
+                                                    se_cmd->residual_count);
+                       /* TODO is status_qualifier field needed? */
+                       v_rsp.status = se_cmd->scsi_status;
+                       v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
+                                                        se_cmd->scsi_sense_length);
+                       memcpy(v_rsp.sense, cmd->tvc_sense_buf,
+                              se_cmd->scsi_sense_length);
+               }
 
                iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov,
                              cmd->tvc_in_iovs, sizeof(v_rsp));
@@ -613,12 +650,12 @@ static int
 vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
                      struct iov_iter *iter,
                      struct scatterlist *sgl,
-                     bool write)
+                     bool is_prot)
 {
        struct page **pages = cmd->tvc_upages;
        struct scatterlist *sg = sgl;
-       ssize_t bytes;
-       size_t offset;
+       ssize_t bytes, mapped_bytes;
+       size_t offset, mapped_offset;
        unsigned int npages = 0;
 
        bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
@@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
        if (bytes <= 0)
                return bytes < 0 ? bytes : -EFAULT;
 
+       mapped_bytes = bytes;
+       mapped_offset = offset;
+
        while (bytes) {
                unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+               /*
+                * The block layer requires bios/requests to be a multiple of
+                * 512 bytes, but Windows can send us vecs that are misaligned.
+                * This can result in bios and later requests with misaligned
+                * sizes if we have to break up a cmd/scatterlist into multiple
+                * bios.
+                *
+                * We currently only break up a command into multiple bios if
+                * we hit the vec/seg limit, so check if our sgl_count is
+                * greater than the max and if a vec in the cmd has a
+                * misaligned offset/size.
+                */
+               if (!is_prot &&
+                   (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) &&
+                   cmd->tvc_sgl_count > BIO_MAX_VECS) {
+                       WARN_ONCE(true,
+                                 "vhost-scsi detected misaligned IO. Performance may be degraded.");
+                       goto revert_iter_get_pages;
+               }
+
                sg_set_page(sg++, pages[npages++], n, offset);
                bytes -= n;
                offset = 0;
        }
+
        return npages;
+
+revert_iter_get_pages:
+       iov_iter_revert(iter, mapped_bytes);
+
+       npages = 0;
+       while (mapped_bytes) {
+               unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset,
+                                      mapped_bytes);
+
+               put_page(pages[npages++]);
+
+               mapped_bytes -= n;
+               mapped_offset = 0;
+       }
+
+       return -EINVAL;
 }
 
 static int
@@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
 }
 
 static int
-vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
-                     struct iov_iter *iter,
-                     struct scatterlist *sg, int sg_count)
+vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+                          struct scatterlist *sg, int sg_count)
+{
+       size_t len = iov_iter_count(iter);
+       unsigned int nbytes = 0;
+       struct page *page;
+       int i;
+
+       if (cmd->tvc_data_direction == DMA_FROM_DEVICE) {
+               cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter,
+                                               GFP_KERNEL);
+               if (!cmd->saved_iter_addr)
+                       return -ENOMEM;
+       }
+
+       for (i = 0; i < sg_count; i++) {
+               page = alloc_page(GFP_KERNEL);
+               if (!page) {
+                       i--;
+                       goto err;
+               }
+
+               nbytes = min_t(unsigned int, PAGE_SIZE, len);
+               sg_set_page(&sg[i], page, nbytes, 0);
+
+               if (cmd->tvc_data_direction == DMA_TO_DEVICE &&
+                   copy_page_from_iter(page, 0, nbytes, iter) != nbytes)
+                       goto err;
+
+               len -= nbytes;
+       }
+
+       cmd->copied_iov = 1;
+       return 0;
+
+err:
+       pr_err("Could not read %u bytes while handling misaligned cmd\n",
+              nbytes);
+
+       for (; i >= 0; i--)
+               __free_page(sg_page(&sg[i]));
+       kfree(cmd->saved_iter_addr);
+       return -ENOMEM;
+}
+
+static int
+vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+                         struct scatterlist *sg, int sg_count, bool is_prot)
 {
        struct scatterlist *p = sg;
+       size_t revert_bytes;
        int ret;
 
        while (iov_iter_count(iter)) {
-               ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
+               ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot);
                if (ret < 0) {
+                       revert_bytes = 0;
+
                        while (p < sg) {
-                               struct page *page = sg_page(p++);
-                               if (page)
+                               struct page *page = sg_page(p);
+
+                               if (page) {
                                        put_page(page);
+                                       revert_bytes += p->length;
+                               }
+                               p++;
                        }
+
+                       iov_iter_revert(iter, revert_bytes);
                        return ret;
                }
                sg += ret;
        }
+
        return 0;
 }
 
@@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
                 size_t data_bytes, struct iov_iter *data_iter)
 {
        int sgl_count, ret;
-       bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
 
        if (prot_bytes) {
                sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
@@ -698,9 +829,9 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
                pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
                         cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
 
-               ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
-                                           cmd->tvc_prot_sgl,
-                                           cmd->tvc_prot_sgl_count);
+               ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter,
+                                               cmd->tvc_prot_sgl,
+                                               cmd->tvc_prot_sgl_count, true);
                if (ret < 0) {
                        cmd->tvc_prot_sgl_count = 0;
                        return ret;
@@ -716,8 +847,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
        pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
                  cmd->tvc_sgl, cmd->tvc_sgl_count);
 
-       ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
-                                   cmd->tvc_sgl, cmd->tvc_sgl_count);
+       ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+                                       cmd->tvc_sgl_count, false);
+       if (ret == -EINVAL) {
+               sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count);
+               ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+                                                cmd->tvc_sgl_count);
+       }
+
        if (ret < 0) {
                cmd->tvc_sgl_count = 0;
                return ret;
index d88265d..f216b2c 100644 (file)
@@ -687,7 +687,7 @@ struct fb_var_cursorinfo {
        __u16 height;
        __u16 xspot;
        __u16 yspot;
-       __u8 data[1];                   /* field with [height][width]        */
+       DECLARE_FLEX_ARRAY(__u8, data); /* field with [height][width]        */
 };
 
 struct fb_cursorstate {
index 987c5f5..f245da1 100644 (file)
@@ -1308,7 +1308,7 @@ static struct platform_driver atmel_lcdfb_driver = {
        .resume         = atmel_lcdfb_resume,
        .driver         = {
                .name   = "atmel_lcdfb",
-               .of_match_table = of_match_ptr(atmel_lcdfb_dt_ids),
+               .of_match_table = atmel_lcdfb_dt_ids,
        },
 };
 
index 6fa2108..e41c9fe 100644 (file)
@@ -203,8 +203,8 @@ static int goldfish_fb_probe(struct platform_device *pdev)
        }
 
        fb->irq = platform_get_irq(pdev, 0);
-       if (fb->irq <= 0) {
-               ret = -ENODEV;
+       if (fb->irq < 0) {
+               ret = fb->irq;
                goto err_no_irq;
        }
 
index 51fbf02..76b50b6 100644 (file)
@@ -519,7 +519,9 @@ static int mmphw_probe(struct platform_device *pdev)
                              "unable to get clk %s\n", mi->clk_name);
                goto failed;
        }
-       clk_prepare_enable(ctrl->clk);
+       ret = clk_prepare_enable(ctrl->clk);
+       if (ret)
+               goto failed;
 
        /* init global regs */
        ctrl_set_default(ctrl);
index 11c3737..46881a6 100644 (file)
@@ -399,8 +399,8 @@ static int ssd1307fb_init(struct ssd1307fb_par *par)
                /* Enable the PWM */
                pwm_enable(par->pwm);
 
-               dev_dbg(&par->client->dev, "Using PWM%d with a %lluns period.\n",
-                       par->pwm->pwm, pwm_get_period(par->pwm));
+               dev_dbg(&par->client->dev, "Using PWM %s with a %lluns period.\n",
+                       par->pwm->label, pwm_get_period(par->pwm));
        }
 
        /* Set initial contrast */
index 835f6cc..fa5226c 100644 (file)
@@ -38,11 +38,6 @@ module_param(bbm_block_size, ulong, 0444);
 MODULE_PARM_DESC(bbm_block_size,
                 "Big Block size in bytes. Default is 0 (auto-detection).");
 
-static bool bbm_safe_unplug = true;
-module_param(bbm_safe_unplug, bool, 0444);
-MODULE_PARM_DESC(bbm_safe_unplug,
-            "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
-
 /*
  * virtio-mem currently supports the following modes of operation:
  *
@@ -173,6 +168,13 @@ struct virtio_mem {
                        /* The number of subblocks per Linux memory block. */
                        uint32_t sbs_per_mb;
 
+                       /*
+                        * Some of the Linux memory blocks tracked as "partially
+                        * plugged" are completely unplugged and can be offlined
+                        * and removed -- which previously failed.
+                        */
+                       bool have_unplugged_mb;
+
                        /* Summary of all memory block states. */
                        unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
 
@@ -746,11 +748,15 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
                 * immediately instead of waiting.
                 */
                virtio_mem_retry(vm);
-       } else {
-               dev_dbg(&vm->vdev->dev,
-                       "offlining and removing memory failed: %d\n", rc);
+               return 0;
        }
-       return rc;
+       dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc);
+       /*
+        * We don't really expect this to fail, because we fake-offlined all
+        * memory already. But it could fail in corner cases.
+        */
+       WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY);
+       return rc == -ENOMEM ? -ENOMEM : -EBUSY;
 }
 
 /*
@@ -767,6 +773,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
 }
 
 /*
+ * Try (offlining and) removing memory from Linux in case all subblocks are
+ * unplugged. Can be called on online and offline memory blocks.
+ *
+ * May modify the state of memory blocks in virtio-mem.
+ */
+static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
+                                                 unsigned long mb_id)
+{
+       int rc;
+
+       /*
+        * Once all subblocks of a memory block were unplugged, offline and
+        * remove it.
+        */
+       if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+               return 0;
+
+       /* offline_and_remove_memory() works for online and offline memory. */
+       mutex_unlock(&vm->hotplug_mutex);
+       rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
+       mutex_lock(&vm->hotplug_mutex);
+       if (!rc)
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_UNUSED);
+       return rc;
+}
+
+/*
  * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
  * all Linux memory blocks covered by the big block.
  */
@@ -1155,7 +1189,8 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
  * Try to allocate a range, marking pages fake-offline, effectively
  * fake-offlining them.
  */
-static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
+static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn,
+                                  unsigned long nr_pages)
 {
        const bool is_movable = is_zone_movable_page(pfn_to_page(pfn));
        int rc, retry_count;
@@ -1168,6 +1203,14 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
         * some guarantees.
         */
        for (retry_count = 0; retry_count < 5; retry_count++) {
+               /*
+                * If the config changed, stop immediately and go back to the
+                * main loop: avoid trying to keep unplugging if the device
+                * might have decided to not remove any more memory.
+                */
+               if (atomic_read(&vm->config_changed))
+                       return -EAGAIN;
+
                rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
                                        GFP_KERNEL);
                if (rc == -ENOMEM)
@@ -1917,7 +1960,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
        start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
                             sb_id * vm->sbm.sb_size);
 
-       rc = virtio_mem_fake_offline(start_pfn, nr_pages);
+       rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages);
        if (rc)
                return rc;
 
@@ -1989,20 +2032,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
        }
 
 unplugged:
-       /*
-        * Once all subblocks of a memory block were unplugged, offline and
-        * remove it. This will usually not fail, as no memory is in use
-        * anymore - however some other notifiers might NACK the request.
-        */
-       if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
-               mutex_unlock(&vm->hotplug_mutex);
-               rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
-               mutex_lock(&vm->hotplug_mutex);
-               if (!rc)
-                       virtio_mem_sbm_set_mb_state(vm, mb_id,
-                                                   VIRTIO_MEM_SBM_MB_UNUSED);
-       }
-
+       rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
+       if (rc)
+               vm->sbm.have_unplugged_mb = 1;
+       /* Ignore errors, this is not critical. We'll retry later. */
        return 0;
 }
 
@@ -2111,38 +2144,32 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
                         VIRTIO_MEM_BBM_BB_ADDED))
                return -EINVAL;
 
-       if (bbm_safe_unplug) {
-               /*
-                * Start by fake-offlining all memory. Once we marked the device
-                * block as fake-offline, all newly onlined memory will
-                * automatically be kept fake-offline. Protect from concurrent
-                * onlining/offlining until we have a consistent state.
-                */
-               mutex_lock(&vm->hotplug_mutex);
-               virtio_mem_bbm_set_bb_state(vm, bb_id,
-                                           VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+       /*
+        * Start by fake-offlining all memory. Once we marked the device
+        * block as fake-offline, all newly onlined memory will
+        * automatically be kept fake-offline. Protect from concurrent
+        * onlining/offlining until we have a consistent state.
+        */
+       mutex_lock(&vm->hotplug_mutex);
+       virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
 
-               for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-                       page = pfn_to_online_page(pfn);
-                       if (!page)
-                               continue;
+       for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               page = pfn_to_online_page(pfn);
+               if (!page)
+                       continue;
 
-                       rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
-                       if (rc) {
-                               end_pfn = pfn;
-                               goto rollback_safe_unplug;
-                       }
+               rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION);
+               if (rc) {
+                       end_pfn = pfn;
+                       goto rollback;
                }
-               mutex_unlock(&vm->hotplug_mutex);
        }
+       mutex_unlock(&vm->hotplug_mutex);
 
        rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
        if (rc) {
-               if (bbm_safe_unplug) {
-                       mutex_lock(&vm->hotplug_mutex);
-                       goto rollback_safe_unplug;
-               }
-               return rc;
+               mutex_lock(&vm->hotplug_mutex);
+               goto rollback;
        }
 
        rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
@@ -2154,7 +2181,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
                                            VIRTIO_MEM_BBM_BB_UNUSED);
        return rc;
 
-rollback_safe_unplug:
+rollback:
        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
                page = pfn_to_online_page(pfn);
                if (!page)
@@ -2260,12 +2287,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
 
 /*
  * Try to unplug all blocks that couldn't be unplugged before, for example,
- * because the hypervisor was busy.
+ * because the hypervisor was busy. Further, offline and remove any memory
+ * blocks where we previously failed.
  */
-static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
+static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
 {
        unsigned long id;
-       int rc;
+       int rc = 0;
 
        if (!vm->in_sbm) {
                virtio_mem_bbm_for_each_bb(vm, id,
@@ -2287,6 +2315,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
                                            VIRTIO_MEM_SBM_MB_UNUSED);
        }
 
+       if (!vm->sbm.have_unplugged_mb)
+               return 0;
+
+       /*
+        * Let's retry (offlining and) removing completely unplugged Linux
+        * memory blocks.
+        */
+       vm->sbm.have_unplugged_mb = false;
+
+       mutex_lock(&vm->hotplug_mutex);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       mutex_unlock(&vm->hotplug_mutex);
+
+       if (rc)
+               vm->sbm.have_unplugged_mb = true;
+       /* Ignore errors, this is not critical. We'll retry later. */
        return 0;
 }
 
@@ -2368,9 +2417,9 @@ retry:
                virtio_mem_refresh_config(vm);
        }
 
-       /* Unplug any leftovers from previous runs */
+       /* Cleanup any leftovers from previous runs */
        if (!rc)
-               rc = virtio_mem_unplug_pending_mb(vm);
+               rc = virtio_mem_cleanup_pending_mb(vm);
 
        if (!rc && vm->requested_size != vm->plugged_size) {
                if (vm->requested_size > vm->plugged_size) {
@@ -2382,6 +2431,13 @@ retry:
                }
        }
 
+       /*
+        * Keep retrying to offline and remove completely unplugged Linux
+        * memory blocks.
+        */
+       if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
+               rc = -EBUSY;
+
        switch (rc) {
        case 0:
                vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
index a46a4a2..97760f6 100644 (file)
@@ -607,9 +607,8 @@ static void virtio_mmio_release_dev(struct device *_d)
        struct virtio_device *vdev =
                        container_of(_d, struct virtio_device, dev);
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-       struct platform_device *pdev = vm_dev->pdev;
 
-       devm_kfree(&pdev->dev, vm_dev);
+       kfree(vm_dev);
 }
 
 /* Platform device */
@@ -620,7 +619,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
        unsigned long magic;
        int rc;
 
-       vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
+       vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL);
        if (!vm_dev)
                return -ENOMEM;
 
index a6c86f9..c2524a7 100644 (file)
@@ -557,8 +557,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 
        pci_set_master(pci_dev);
 
-       vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
-
        rc = register_virtio_device(&vp_dev->vdev);
        reg_dev = vp_dev;
        if (rc)
index 2257f1b..d9cbb02 100644 (file)
@@ -223,6 +223,7 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
        vp_dev->config_vector = vp_config_vector;
        vp_dev->setup_vq = setup_vq;
        vp_dev->del_vq = del_vq;
+       vp_dev->is_legacy = true;
 
        return 0;
 }
index 989e2d7..961161d 100644 (file)
@@ -393,11 +393,13 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
        cb.callback = virtio_vdpa_config_cb;
        cb.private = vd_dev;
        ops->set_config_cb(vdpa, &cb);
+       kfree(masks);
 
        return 0;
 
 err_setup_vq:
        virtio_vdpa_del_vqs(vdev);
+       kfree(masks);
        return err;
 }
 
index 030ab44..82324c3 100644 (file)
@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
                                           u64 num_bytes)
 {
        struct btrfs_caching_control *caching_ctl;
+       int progress;
 
        caching_ctl = btrfs_get_caching_control(cache);
        if (!caching_ctl)
                return;
 
+       /*
+        * We've already failed to allocate from this block group, so even if
+        * there's enough space in the block group it isn't contiguous enough to
+        * allow for an allocation, so wait for at least the next wakeup tick,
+        * or for the thing to be done.
+        */
+       progress = atomic_read(&caching_ctl->progress);
+
        wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
-                  (cache->free_space_ctl->free_space >= num_bytes));
+                  (progress != atomic_read(&caching_ctl->progress) &&
+                   (cache->free_space_ctl->free_space >= num_bytes)));
 
        btrfs_put_caching_control(caching_ctl);
 }
@@ -802,8 +812,10 @@ next:
 
                        if (total_found > CACHING_CTL_WAKE_UP) {
                                total_found = 0;
-                               if (wakeup)
+                               if (wakeup) {
+                                       atomic_inc(&caching_ctl->progress);
                                        wake_up(&caching_ctl->wait);
+                               }
                        }
                }
                path->slots[0]++;
@@ -910,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
        init_waitqueue_head(&caching_ctl->wait);
        caching_ctl->block_group = cache;
        refcount_set(&caching_ctl->count, 2);
+       atomic_set(&caching_ctl->progress, 0);
        btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
 
        spin_lock(&cache->lock);
index aba5dff..74b61e6 100644 (file)
@@ -90,6 +90,8 @@ struct btrfs_caching_control {
        wait_queue_head_t wait;
        struct btrfs_work work;
        struct btrfs_block_group *block_group;
+       /* Track progress of caching during allocation. */
+       atomic_t progress;
        refcount_t count;
 };
 
index f2d2b31..9419f4e 100644 (file)
@@ -443,6 +443,7 @@ struct btrfs_drop_extents_args {
 
 struct btrfs_file_private {
        void *filldir_buf;
+       u64 last_index;
        struct extent_state *llseek_cached_state;
 };
 
index 6b457b0..6d51db0 100644 (file)
@@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
 }
 
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list)
 {
@@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 
        mutex_lock(&delayed_node->mutex);
        item = __btrfs_first_delayed_insertion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, ins_list);
                item = __btrfs_next_delayed_item(item);
        }
 
        item = __btrfs_first_delayed_deletion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, del_list);
                item = __btrfs_next_delayed_item(item);
index 4f21daa..dc1085b 100644 (file)
@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
 
 /* Used for readdir() */
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list);
 void btrfs_readdir_put_delayed_items(struct inode *inode,
index 9b9914e..a9a2c54 100644 (file)
@@ -1103,7 +1103,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
        btrfs_drew_lock_init(&root->snapshot_lock);
 
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
-           !btrfs_is_data_reloc_root(root)) {
+           !btrfs_is_data_reloc_root(root) &&
+           is_fstree(root->root_key.objectid)) {
                set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
                btrfs_check_and_init_root_item(&root->root_item);
        }
@@ -1300,6 +1301,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
        root = btrfs_get_global_root(fs_info, objectid);
        if (root)
                return root;
+
+       /*
+        * If we're called for non-subvolume trees, and above function didn't
+        * find one, do not try to read it from disk.
+        *
+        * This is namely for free-space-tree and quota tree, which can change
+        * at runtime and should only be grabbed from fs_info.
+        */
+       if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+               return ERR_PTR(-ENOENT);
 again:
        root = btrfs_lookup_fs_root(fs_info, objectid);
        if (root) {
index 911908e..f396a9a 100644 (file)
@@ -4310,8 +4310,11 @@ have_block_group:
                        ret = 0;
                }
 
-               if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
+               if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
+                       if (!cache_block_group_error)
+                               cache_block_group_error = -EIO;
                        goto loop;
+               }
 
                if (!find_free_extent_check_size_class(ffe_ctl, block_group))
                        goto loop;
index a91d5ad..90ad300 100644 (file)
@@ -902,7 +902,30 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
                size -= len;
                pg_offset += len;
                disk_bytenr += len;
-               bio_ctrl->len_to_oe_boundary -= len;
+
+               /*
+                * len_to_oe_boundary defaults to U32_MAX, which isn't page or
+                * sector aligned.  alloc_new_bio() then sets it to the end of
+                * our ordered extent for writes into zoned devices.
+                *
+                * When len_to_oe_boundary is tracking an ordered extent, we
+                * trust the ordered extent code to align things properly, and
+                * the check above to cap our write to the ordered extent
+                * boundary is correct.
+                *
+                * When len_to_oe_boundary is U32_MAX, the cap above would
+                * result in a 4095 byte IO for the last page right before
+                * we hit the bio limit of UINT_MAX.  bio_add_page() has all
+                * the checks required to make sure we don't overflow the bio,
+                * and we should just ignore len_to_oe_boundary completely
+                * unless we're using it to track an ordered extent.
+                *
+                * It's pretty hard to make a bio sized U32_MAX, but it can
+                * happen when the page cache is able to feed us contiguous
+                * pages for large extents.
+                */
+               if (bio_ctrl->len_to_oe_boundary != U32_MAX)
+                       bio_ctrl->len_to_oe_boundary -= len;
 
                /* Ordered extent boundary: move on to a new bio. */
                if (bio_ctrl->len_to_oe_boundary == 0)
@@ -2145,6 +2168,12 @@ retry:
                                continue;
                        }
 
+                       if (!folio_test_dirty(folio)) {
+                               /* Someone wrote it for us. */
+                               folio_unlock(folio);
+                               continue;
+                       }
+
                        if (wbc->sync_mode != WB_SYNC_NONE) {
                                if (folio_test_writeback(folio))
                                        submit_write_bio(bio_ctrl, 0);
@@ -2164,11 +2193,12 @@ retry:
                        }
 
                        /*
-                        * the filesystem may choose to bump up nr_to_write.
+                        * The filesystem may choose to bump up nr_to_write.
                         * We have to make sure to honor the new nr_to_write
-                        * at any time
+                        * at any time.
                         */
-                       nr_to_write_done = wbc->nr_to_write <= 0;
+                       nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
+                                           wbc->nr_to_write <= 0);
                }
                folio_batch_release(&fbatch);
                cond_resched();
index 0cdb3e8..a6d8368 100644 (file)
@@ -760,8 +760,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
                        start = em_end;
-                       if (end != (u64)-1)
-                               len = start + len - em_end;
                        goto next;
                }
 
@@ -829,8 +827,8 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
                                if (!split)
                                        goto remove_em;
                        }
-                       split->start = start + len;
-                       split->len = em_end - (start + len);
+                       split->start = end;
+                       split->len = em_end - end;
                        split->block_start = em->block_start;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
index 49cef61..aa090b0 100644 (file)
@@ -1654,8 +1654,6 @@ out_unlock:
                                             clear_bits,
                                             page_ops);
                start += cur_alloc_size;
-               if (start >= end)
-                       return ret;
        }
 
        /*
@@ -1664,9 +1662,11 @@ out_unlock:
         * space_info's bytes_may_use counter, reserved in
         * btrfs_check_data_free_space().
         */
-       extent_clear_unlock_delalloc(inode, start, end, locked_page,
-                                    clear_bits | EXTENT_CLEAR_DATA_RESV,
-                                    page_ops);
+       if (start < end) {
+               clear_bits |= EXTENT_CLEAR_DATA_RESV;
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            clear_bits, page_ops);
+       }
        return ret;
 }
 
@@ -5873,6 +5873,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 /*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key, found_key;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = (u64)-1;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       /* FIXME: we should be able to handle this */
+       if (ret == 0)
+               goto out;
+       ret = 0;
+
+       if (path->slots[0] == 0) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       path->slots[0]--;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+       if (found_key.objectid != btrfs_ino(inode) ||
+           found_key.type != BTRFS_DIR_INDEX_KEY) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       inode->index_cnt = found_key.offset + 1;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+       if (dir->index_cnt == (u64)-1) {
+               int ret;
+
+               ret = btrfs_inode_delayed_dir_index_count(dir);
+               if (ret) {
+                       ret = btrfs_set_inode_index_count(dir);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       *index = dir->index_cnt;
+
+       return 0;
+}
+
+/*
  * All this infrastructure exists because dir_emit can fault, and we are holding
  * the tree lock when doing readdir.  For now just allocate a buffer and copy
  * our information into that, and then dir_emit from the buffer.  This is
@@ -5884,10 +5952,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 static int btrfs_opendir(struct inode *inode, struct file *file)
 {
        struct btrfs_file_private *private;
+       u64 last_index;
+       int ret;
+
+       ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+       if (ret)
+               return ret;
 
        private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
        if (!private)
                return -ENOMEM;
+       private->last_index = last_index;
        private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
        if (!private->filldir_buf) {
                kfree(private);
@@ -5954,7 +6029,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 
        INIT_LIST_HEAD(&ins_list);
        INIT_LIST_HEAD(&del_list);
-       put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+       put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+                                             &ins_list, &del_list);
 
 again:
        key.type = BTRFS_DIR_INDEX_KEY;
@@ -5972,6 +6048,8 @@ again:
                        break;
                if (found_key.offset < ctx->pos)
                        continue;
+               if (found_key.offset > private->last_index)
+                       break;
                if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
                        continue;
                di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
@@ -6108,57 +6186,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
 }
 
 /*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
-       struct btrfs_root *root = inode->root;
-       struct btrfs_key key, found_key;
-       struct btrfs_path *path;
-       struct extent_buffer *leaf;
-       int ret;
-
-       key.objectid = btrfs_ino(inode);
-       key.type = BTRFS_DIR_INDEX_KEY;
-       key.offset = (u64)-1;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
-       /* FIXME: we should be able to handle this */
-       if (ret == 0)
-               goto out;
-       ret = 0;
-
-       if (path->slots[0] == 0) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       path->slots[0]--;
-
-       leaf = path->nodes[0];
-       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-       if (found_key.objectid != btrfs_ino(inode) ||
-           found_key.type != BTRFS_DIR_INDEX_KEY) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       inode->index_cnt = found_key.offset + 1;
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
-/*
  * helper to find a free sequence number in a given directory.  This current
  * code is very simple, later versions will do smarter things in the btree
  */
index 25a3361..46c3c1d 100644 (file)
@@ -1916,7 +1916,39 @@ again:
                                err = PTR_ERR(root);
                        break;
                }
-               ASSERT(root->reloc_root == reloc_root);
+
+               if (unlikely(root->reloc_root != reloc_root)) {
+                       if (root->reloc_root) {
+                               btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
+                                         root->root_key.objectid,
+                                         root->reloc_root->root_key.objectid,
+                                         root->reloc_root->root_key.type,
+                                         root->reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &root->reloc_root->root_item),
+                                         reloc_root->root_key.objectid,
+                                         reloc_root->root_key.type,
+                                         reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &reloc_root->root_item));
+                       } else {
+                               btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
+                                         root->root_key.objectid,
+                                         reloc_root->root_key.objectid,
+                                         reloc_root->root_key.type,
+                                         reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &reloc_root->root_item));
+                       }
+                       list_add(&reloc_root->root_list, &reloc_roots);
+                       btrfs_put_root(root);
+                       btrfs_abort_transaction(trans, -EUCLEAN);
+                       if (!err)
+                               err = -EUCLEAN;
+                       break;
+               }
 
                /*
                 * set reference count to 1, so btrfs_recover_relocation
@@ -1989,7 +2021,7 @@ again:
                root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
                                         false);
                if (btrfs_root_refs(&reloc_root->root_item) > 0) {
-                       if (IS_ERR(root)) {
+                       if (WARN_ON(IS_ERR(root))) {
                                /*
                                 * For recovery we read the fs roots on mount,
                                 * and if we didn't find the root then we marked
@@ -1998,17 +2030,14 @@ again:
                                 * memory.  However there's no reason we can't
                                 * handle the error properly here just in case.
                                 */
-                               ASSERT(0);
                                ret = PTR_ERR(root);
                                goto out;
                        }
-                       if (root->reloc_root != reloc_root) {
+                       if (WARN_ON(root->reloc_root != reloc_root)) {
                                /*
-                                * This is actually impossible without something
-                                * going really wrong (like weird race condition
-                                * or cosmic rays).
+                                * This can happen if on-disk metadata has some
+                                * corruption, e.g. bad reloc tree key offset.
                                 */
-                               ASSERT(0);
                                ret = -EINVAL;
                                goto out;
                        }
index 4cae41b..7289f5b 100644 (file)
@@ -605,7 +605,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
                              btrfs_stack_header_bytenr(header), logical);
                return;
        }
-       if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) {
+       if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
+                  BTRFS_FSID_SIZE) != 0) {
                bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
                bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
                btrfs_warn_rl(fs_info,
index 038dfa8..ab08a0b 100644 (file)
@@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
        btrfs_item_key_to_cpu(leaf, &item_key, slot);
        is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
 
+       /*
+        * Bad rootid for reloc trees.
+        *
+        * Reloc trees are only for subvolume trees, other trees only need
+        * to be COWed to be relocated.
+        */
+       if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
+                    !is_fstree(key->offset))) {
+               generic_err(leaf, slot,
+               "invalid reloc tree for root %lld, root id is not a subvolume tree",
+                           key->offset);
+               return -EUCLEAN;
+       }
+
        /* No such tree id */
        if (unlikely(key->objectid == 0)) {
                if (is_root_item)
index 2ecb76c..6aa9bf3 100644 (file)
@@ -4638,8 +4638,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
                }
        }
 
-       BUG_ON(fs_info->balance_ctl ||
-               test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+       ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
        atomic_dec(&fs_info->balance_cancel_req);
        mutex_unlock(&fs_info->balance_mutex);
        return 0;
index 1bf3c44..b43fa8b 100644 (file)
@@ -1578,7 +1578,7 @@ const struct file_operations gfs2_file_fops = {
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = simple_nosetlease,
        .fallocate      = gfs2_fallocate,
@@ -1609,7 +1609,7 @@ const struct file_operations gfs2_file_fops_nolock = {
        .open           = gfs2_open,
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
index ec16312..7e835be 100644 (file)
@@ -230,9 +230,11 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
 {
 
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+       struct super_block *sb = sdp->sd_vfs;
        struct gfs2_bufdata *bd;
        struct gfs2_meta_header *mh;
        struct gfs2_trans *tr = current->journal_info;
+       bool withdraw = false;
 
        lock_buffer(bh);
        if (buffer_pinned(bh)) {
@@ -266,13 +268,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
                       (unsigned long long)bd->bd_bh->b_blocknr);
                BUG();
        }
-       if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) {
-               fs_info(sdp, "GFS2:adding buf while frozen\n");
-               gfs2_assert_withdraw(sdp, 0);
-       }
        if (unlikely(gfs2_withdrawn(sdp))) {
                fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
                        (unsigned long long)bd->bd_bh->b_blocknr);
+               goto out_unlock;
+       }
+       if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) {
+               fs_info(sdp, "GFS2:adding buf while frozen\n");
+               withdraw = true;
+               goto out_unlock;
        }
        gfs2_pin(sdp, bd->bd_bh);
        mh->__pad0 = cpu_to_be64(0);
@@ -281,6 +285,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
        tr->tr_num_buf_new++;
 out_unlock:
        gfs2_log_unlock(sdp);
+       if (withdraw)
+               gfs2_assert_withdraw(sdp, 0);
 out:
        unlock_buffer(bh);
 }
index 8fefb69..67611a3 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
-#include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
@@ -1041,8 +1040,6 @@ struct inode *new_inode(struct super_block *sb)
 {
        struct inode *inode;
 
-       spin_lock_prefetch(&sb->s_inode_list_lock);
-
        inode = new_inode_pseudo(sb);
        if (inode)
                inode_sb_list_add(inode);
index 9a18c5a..aaffaaa 100644 (file)
@@ -472,20 +472,26 @@ out:
        return result;
 }
 
-static void
-nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
 {
-       struct nfs_page *req, *next;
+       struct nfs_page *req, *subreq;
 
        list_for_each_entry(req, list, wb_list) {
-               if (req->wb_head != req || req->wb_this_page == req)
+               if (req->wb_head != req)
                        continue;
-               for (next = req->wb_this_page;
-                               next != req->wb_head;
-                               next = next->wb_this_page) {
-                       nfs_list_remove_request(next);
-                       nfs_release_request(next);
-               }
+               subreq = req->wb_this_page;
+               if (subreq == req)
+                       continue;
+               do {
+                       /*
+                        * Remove subrequests from this list before freeing
+                        * them in the call to nfs_join_page_group().
+                        */
+                       if (!list_empty(&subreq->wb_list)) {
+                               nfs_list_remove_request(subreq);
+                               nfs_release_request(subreq);
+                       }
+               } while ((subreq = subreq->wb_this_page) != req);
                nfs_join_page_group(req, inode);
        }
 }
index 63802d1..49f78e2 100644 (file)
@@ -1377,7 +1377,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
        for (i = 0; i < np; i++) {
                pages[i] = alloc_page(GFP_KERNEL);
                if (!pages[i]) {
-                       np = i + 1;
                        err = -ENOMEM;
                        goto out;
                }
@@ -1401,8 +1400,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
        } while (exception.retry);
 
 out:
-       while (--np >= 0)
-               __free_page(pages[np]);
+       while (--i >= 0)
+               __free_page(pages[i]);
        kfree(pages);
 
        return err;
index e1a886b..832fa22 100644 (file)
@@ -6004,9 +6004,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
 out_ok:
        ret = res.acl_len;
 out_free:
-       for (i = 0; i < npages; i++)
-               if (pages[i])
-                       __free_page(pages[i]);
+       while (--i >= 0)
+               __free_page(pages[i]);
        if (res.acl_scratch)
                __free_page(res.acl_scratch);
        kfree(pages);
@@ -7181,8 +7180,15 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
                } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
                        goto out_restart;
                break;
-       case -NFS4ERR_BAD_STATEID:
        case -NFS4ERR_OLD_STATEID:
+               if (data->arg.new_lock_owner != 0 &&
+                       nfs4_refresh_open_old_stateid(&data->arg.open_stateid,
+                                       lsp->ls_state))
+                       goto out_restart;
+               if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp))
+                       goto out_restart;
+               fallthrough;
+       case -NFS4ERR_BAD_STATEID:
        case -NFS4ERR_STALE_STATEID:
        case -NFS4ERR_EXPIRED:
                if (data->arg.new_lock_owner != 0) {
index acda8f0..bf378ec 100644 (file)
@@ -345,8 +345,10 @@ void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
        int ret = -ENOMEM;
 
        s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
-       if (s)
+       if (s) {
                ret = kobject_rename(&server->kobj, s);
+               kfree(s);
+       }
        if (ret < 0)
                pr_warn("NFS: rename sysfs %s failed (%d)\n",
                                        server->kobj.name, ret);
index 3aefbad..daf305d 100644 (file)
@@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
        trace_nfsd_stid_revoke(&dp->dl_stid);
 
        if (clp->cl_minorversion) {
+               spin_lock(&clp->cl_lock);
                dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
                refcount_inc(&dp->dl_stid.sc_count);
-               spin_lock(&clp->cl_lock);
                list_add(&dp->dl_recall_lru, &clp->cl_revoked);
                spin_unlock(&clp->cl_lock);
        }
index 1b8b1aa..4302ca0 100644 (file)
@@ -1105,6 +1105,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
                        if (!nn->nfsd_serv)
                                return -EBUSY;
                        trace_nfsd_end_grace(netns(file));
+                       nfsd4_end_grace(nn);
                        break;
                default:
                        return -EINVAL;
index a8ce522..35bc793 100644 (file)
@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
 
 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
 {
+       struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
        struct buffer_head *ibh;
        int err;
 
+       /*
+        * Do not dirty inodes after the log writer has been detached
+        * and its nilfs_root struct has been freed.
+        */
+       if (unlikely(nilfs_purging(nilfs)))
+               return 0;
+
        err = nilfs_load_inode_block(inode, &ibh);
        if (unlikely(err)) {
                nilfs_warn(inode->i_sb,
index c255302..7ec1687 100644 (file)
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
                struct folio *folio = fbatch.folios[i];
 
                folio_lock(folio);
+               if (unlikely(folio->mapping != mapping)) {
+                       /* Exclude folios removed from the address space */
+                       folio_unlock(folio);
+                       continue;
+               }
                head = folio_buffers(folio);
                if (!head) {
                        create_empty_buffers(&folio->page, i_blocksize(inode), 0);
@@ -2845,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
                nilfs_segctor_destroy(nilfs->ns_writer);
                nilfs->ns_writer = NULL;
        }
+       set_nilfs_purging(nilfs);
 
        /* Force to free the list of dirty files */
        spin_lock(&nilfs->ns_inode_lock);
@@ -2857,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
        up_write(&nilfs->ns_segctor_sem);
 
        nilfs_dispose_list(nilfs, &garbage_list, 1);
+       clear_nilfs_purging(nilfs);
 }
index 47c7dfb..cd4ae1b 100644 (file)
@@ -29,6 +29,7 @@ enum {
        THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
        THE_NILFS_GC_RUNNING,   /* gc process is running */
        THE_NILFS_SB_DIRTY,     /* super block is dirty */
+       THE_NILFS_PURGING,      /* disposing dirty files for cleanup */
 };
 
 /**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
 THE_NILFS_FNS(DISCONTINUED, discontinued)
 THE_NILFS_FNS(GC_RUNNING, gc_running)
 THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
 
 /*
  * Mount option operations
index 9cb32e1..23fc24d 100644 (file)
@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
 
 static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
+       struct file *file = iocb->ki_filp;
+       char *buf = file->private_data;
        loff_t *fpos = &iocb->ki_pos;
        size_t phdrs_offset, notes_offset, data_offset;
        size_t page_offline_frozen = 1;
@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
                case KCORE_VMEMMAP:
                case KCORE_TEXT:
                        /*
-                        * We use _copy_to_iter() to bypass usermode hardening
-                        * which would otherwise prevent this operation.
+                        * Sadly we must use a bounce buffer here to be able to
+                        * make use of copy_from_kernel_nofault(), as these
+                        * memory regions might not always be mapped on all
+                        * architectures.
                         */
-                       if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+                       if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+                               if (iov_iter_zero(tsz, iter) != tsz) {
+                                       ret = -EFAULT;
+                                       goto out;
+                               }
+                       /*
+                        * We know the bounce buffer is safe to copy from, so
+                        * use _copy_to_iter() directly.
+                        */
+                       } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
                                ret = -EFAULT;
                                goto out;
                        }
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
        if (ret)
                return ret;
 
+       filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!filp->private_data)
+               return -ENOMEM;
+
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
        return 0;
 }
 
+static int release_kcore(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
 static const struct proc_ops kcore_proc_ops = {
        .proc_read_iter = read_kcore_iter,
        .proc_open      = open_kcore,
+       .proc_release   = release_kcore,
        .proc_lseek     = default_llseek,
 };
 
index 507cd4e..fafff1b 100644 (file)
@@ -587,8 +587,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        bool migration = false;
 
        if (pmd_present(*pmd)) {
-               /* FOLL_DUMP will return -EFAULT on huge zero page */
-               page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+               page = vm_normal_page_pmd(vma, addr, *pmd);
        } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
 
@@ -758,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops smaps_walk_ops = {
        .pmd_entry              = smaps_pte_range,
        .hugetlb_entry          = smaps_hugetlb_range,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static const struct mm_walk_ops smaps_shmem_walk_ops = {
        .pmd_entry              = smaps_pte_range,
        .hugetlb_entry          = smaps_hugetlb_range,
        .pte_hole               = smaps_pte_hole,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
@@ -1245,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 static const struct mm_walk_ops clear_refs_walk_ops = {
        .pmd_entry              = clear_refs_pte_range,
        .test_walk              = clear_refs_test_walk,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@@ -1622,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = {
        .pmd_entry      = pagemap_pmd_range,
        .pte_hole       = pagemap_pte_hole,
        .hugetlb_entry  = pagemap_hugetlb_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 /*
@@ -1935,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops show_numa_ops = {
        .hugetlb_entry = gather_hugetlb_stats,
        .pmd_entry = gather_pte_stats,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
index fb4162a..aec6e91 100644 (file)
@@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
                   in_flight(server),
                   atomic_read(&server->in_send),
                   atomic_read(&server->num_waiters));
+#ifdef CONFIG_NET_NS
+       if (server->net)
+               seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
+
 }
 
 static inline const char *smb_speed_to_str(size_t bps)
@@ -430,10 +435,15 @@ skip_rdma:
                                server->reconnect_instance,
                                server->srv_count,
                                server->sec_mode, in_flight(server));
+#ifdef CONFIG_NET_NS
+               if (server->net)
+                       seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
 
                seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
                                atomic_read(&server->in_send),
                                atomic_read(&server->num_waiters));
+
                if (server->leaf_fullpath) {
                        seq_printf(m, "\nDFS leaf full path: %s",
                                   server->leaf_fullpath);
index fc5acc9..6bc44f7 100644 (file)
@@ -4681,9 +4681,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 
 io_error:
        kunmap(page);
-       unlock_page(page);
 
 read_complete:
+       unlock_page(page);
        return rc;
 }
 
@@ -4878,9 +4878,11 @@ void cifs_oplock_break(struct work_struct *work)
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
                                                  oplock_break);
        struct inode *inode = d_inode(cfile->dentry);
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
-       struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct TCP_Server_Info *server = tcon->ses->server;
+       struct cifs_tcon *tcon;
+       struct TCP_Server_Info *server;
+       struct tcon_link *tlink;
        int rc = 0;
        bool purge_cache = false, oplock_break_cancelled;
        __u64 persistent_fid, volatile_fid;
@@ -4889,6 +4891,12 @@ void cifs_oplock_break(struct work_struct *work)
        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
                        TASK_UNINTERRUPTIBLE);
 
+       tlink = cifs_sb_tlink(cifs_sb);
+       if (IS_ERR(tlink))
+               goto out;
+       tcon = tlink_tcon(tlink);
+       server = tcon->ses->server;
+
        server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
                                      cfile->oplock_epoch, &purge_cache);
 
@@ -4938,18 +4946,19 @@ oplock_break_ack:
        /*
         * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
         * an acknowledgment to be sent when the file has already been closed.
-        * check for server null, since can race with kill_sb calling tree disconnect.
         */
        spin_lock(&cinode->open_file_lock);
-       if (tcon->ses && tcon->ses->server && !oplock_break_cancelled &&
-                                       !list_empty(&cinode->openFileList)) {
+       /* check list empty since can race with kill_sb calling tree disconnect */
+       if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
                spin_unlock(&cinode->open_file_lock);
-               rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid,
-                                               volatile_fid, net_fid, cinode);
+               rc = server->ops->oplock_response(tcon, persistent_fid,
+                                                 volatile_fid, net_fid, cinode);
                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
        } else
                spin_unlock(&cinode->open_file_lock);
 
+       cifs_put_tlink(tlink);
+out:
        cifs_done_oplock_break(cinode);
 }
 
index 4946a0c..67e16c2 100644 (file)
@@ -231,6 +231,8 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
                break;
        case Opt_sec_none:
                ctx->nullauth = 1;
+               kfree(ctx->username);
+               ctx->username = NULL;
                break;
        default:
                cifs_errorf(fc, "bad security option: %s\n", value);
@@ -1201,6 +1203,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
        case Opt_user:
                kfree(ctx->username);
                ctx->username = NULL;
+               if (ctx->nullauth)
+                       break;
                if (strlen(param->string) == 0) {
                        /* null user, ie. anonymous authentication */
                        ctx->nullauth = 1;
index 33b7e6c..e881df1 100644 (file)
@@ -380,13 +380,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
        }
 
        if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
-               if (command == SMB2_OPLOCK_BREAK_HE &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+               if (!(command == SMB2_OPLOCK_BREAK_HE &&
+                   (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 ||
+                   le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) {
                        /* special case for SMB2.1 lease break message */
                        ksmbd_debug(SMB,
-                                   "Illegal request size %d for oplock break\n",
-                                   le16_to_cpu(pdu->StructureSize2));
+                               "Illegal request size %u for command %d\n",
+                               le16_to_cpu(pdu->StructureSize2), command);
                        return 1;
                }
        }
index 9849d74..7cc1b0c 100644 (file)
@@ -2324,9 +2324,16 @@ next:
                        break;
                buf_len -= next;
                eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
-               if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength))
+               if (buf_len < sizeof(struct smb2_ea_info)) {
+                       rc = -EINVAL;
                        break;
+               }
 
+               if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+                               le16_to_cpu(eabuf->EaValueLength)) {
+                       rc = -EINVAL;
+                       break;
+               }
        } while (next != 0);
 
        kfree(attr_name);
index aca8290..069a019 100644 (file)
@@ -68,9 +68,9 @@ struct shfl_string {
 
        /** UTF-8 or UTF-16 string. Nul terminated. */
        union {
-               u8 utf8[2];
-               u16 utf16[1];
-               u16 ucs2[1]; /* misnomer, use utf16. */
+               u8 legacy_padding[2];
+               DECLARE_FLEX_ARRAY(u8, utf8);
+               DECLARE_FLEX_ARRAY(u16, utf16);
        } string;
 };
 VMMDEV_ASSERT_SIZE(shfl_string, 6);
index 92c9aaa..789cfb7 100644 (file)
@@ -341,77 +341,6 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
        return generic_file_llseek_size(file, offset, whence, isize, isize);
 }
 
-struct zonefs_zone_append_bio {
-       /* The target inode of the BIO */
-       struct inode *inode;
-
-       /* For sync writes, the target append write offset */
-       u64 append_offset;
-
-       /*
-        * This member must come last, bio_alloc_bioset will allocate enough
-        * bytes for entire zonefs_bio but relies on bio being last.
-        */
-       struct bio bio;
-};
-
-static inline struct zonefs_zone_append_bio *
-zonefs_zone_append_bio(struct bio *bio)
-{
-       return container_of(bio, struct zonefs_zone_append_bio, bio);
-}
-
-static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio)
-{
-       struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
-       struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode);
-       sector_t za_sector;
-
-       if (bio->bi_status != BLK_STS_OK)
-               goto bio_end;
-
-       /*
-        * If the file zone was written underneath the file system, the zone
-        * append operation can still succedd (if the zone is not full) but
-        * the write append location will not be where we expect it to be.
-        * Check that we wrote where we intended to, that is, at z->z_wpoffset.
-        */
-       za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT);
-       if (bio->bi_iter.bi_sector != za_sector) {
-               zonefs_warn(za_bio->inode->i_sb,
-                           "Invalid write sector %llu for zone at %llu\n",
-                           bio->bi_iter.bi_sector, z->z_sector);
-               bio->bi_status = BLK_STS_IOERR;
-       }
-
-bio_end:
-       iomap_dio_bio_end_io(bio);
-}
-
-static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter,
-                                                 struct bio *bio,
-                                                 loff_t file_offset)
-{
-       struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
-       struct inode *inode = iter->inode;
-       struct zonefs_zone *z = zonefs_inode_zone(inode);
-
-       /*
-        * Issue a zone append BIO to process sync dio writes. The append
-        * file offset is saved to check the zone append write location
-        * on completion of the BIO.
-        */
-       za_bio->inode = inode;
-       za_bio->append_offset = file_offset;
-
-       bio->bi_opf &= ~REQ_OP_WRITE;
-       bio->bi_opf |= REQ_OP_ZONE_APPEND;
-       bio->bi_iter.bi_sector = z->z_sector;
-       bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io;
-
-       submit_bio(bio);
-}
-
 static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
                                        int error, unsigned int flags)
 {
@@ -442,14 +371,6 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
        return 0;
 }
 
-static struct bio_set zonefs_zone_append_bio_set;
-
-static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
-       .submit_io      = zonefs_file_zone_append_dio_submit_io,
-       .end_io         = zonefs_file_write_dio_end_io,
-       .bio_set        = &zonefs_zone_append_bio_set,
-};
-
 static const struct iomap_dio_ops zonefs_write_dio_ops = {
        .end_io         = zonefs_file_write_dio_end_io,
 };
@@ -533,9 +454,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        struct zonefs_zone *z = zonefs_inode_zone(inode);
        struct super_block *sb = inode->i_sb;
-       const struct iomap_dio_ops *dio_ops;
-       bool sync = is_sync_kiocb(iocb);
-       bool append = false;
        ssize_t ret, count;
 
        /*
@@ -543,7 +461,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
         * as this can cause write reordering (e.g. the first aio gets EAGAIN
         * on the inode lock but the second goes through but is now unaligned).
         */
-       if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
+       if (zonefs_zone_is_seq(z) && !is_sync_kiocb(iocb) &&
+           (iocb->ki_flags & IOCB_NOWAIT))
                return -EOPNOTSUPP;
 
        if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -573,18 +492,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
                        goto inode_unlock;
                }
                mutex_unlock(&zi->i_truncate_mutex);
-               append = sync;
-       }
-
-       if (append) {
-               unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev);
-
-               max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize);
-               iov_iter_truncate(from, max);
-
-               dio_ops = &zonefs_zone_append_dio_ops;
-       } else {
-               dio_ops = &zonefs_write_dio_ops;
        }
 
        /*
@@ -593,7 +500,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
         * the user can make sense of the error.
         */
        ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
-                          dio_ops, 0, NULL, 0);
+                          &zonefs_write_dio_ops, 0, NULL, 0);
        if (ret == -ENOTBLK)
                ret = -EBUSY;
 
@@ -938,15 +845,3 @@ const struct file_operations zonefs_file_operations = {
        .splice_write   = iter_file_splice_write,
        .iopoll         = iocb_bio_iopoll,
 };
-
-int zonefs_file_bioset_init(void)
-{
-       return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE,
-                          offsetof(struct zonefs_zone_append_bio, bio),
-                          BIOSET_NEED_BVECS);
-}
-
-void zonefs_file_bioset_exit(void)
-{
-       bioset_exit(&zonefs_zone_append_bio_set);
-}
index bbe44a2..9350221 100644 (file)
@@ -1412,13 +1412,9 @@ static int __init zonefs_init(void)
 
        BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
 
-       ret = zonefs_file_bioset_init();
-       if (ret)
-               return ret;
-
        ret = zonefs_init_inodecache();
        if (ret)
-               goto destroy_bioset;
+               return ret;
 
        ret = zonefs_sysfs_init();
        if (ret)
@@ -1434,8 +1430,6 @@ sysfs_exit:
        zonefs_sysfs_exit();
 destroy_inodecache:
        zonefs_destroy_inodecache();
-destroy_bioset:
-       zonefs_file_bioset_exit();
 
        return ret;
 }
@@ -1445,7 +1439,6 @@ static void __exit zonefs_exit(void)
        unregister_filesystem(&zonefs_type);
        zonefs_sysfs_exit();
        zonefs_destroy_inodecache();
-       zonefs_file_bioset_exit();
 }
 
 MODULE_AUTHOR("Damien Le Moal");
index f663b8e..8175652 100644 (file)
@@ -279,8 +279,6 @@ extern const struct file_operations zonefs_dir_operations;
 extern const struct address_space_operations zonefs_file_aops;
 extern const struct file_operations zonefs_file_operations;
 int zonefs_file_truncate(struct inode *inode, loff_t isize);
-int zonefs_file_bioset_init(void);
-void zonefs_file_bioset_exit(void);
 
 /* In sysfs.c */
 int zonefs_sysfs_register(struct super_block *sb);
index 02f2ac4..e69cece 100644 (file)
@@ -1537,7 +1537,7 @@ enum drm_dp_phy {
 
 #define DP_BRANCH_OUI_HEADER_SIZE      0xc
 #define DP_RECEIVER_CAP_SIZE           0xf
-#define DP_DSC_RECEIVER_CAP_SIZE        0xf
+#define DP_DSC_RECEIVER_CAP_SIZE        0x10 /* DSC Capabilities 0x60 through 0x6F */
 #define EDP_PSR_RECEIVER_CAP_SIZE      2
 #define EDP_DISPLAY_CTL_CAP_SIZE       3
 #define DP_LTTPR_COMMON_CAP_SIZE       8
index 169755d..48e93f9 100644 (file)
@@ -61,15 +61,9 @@ struct std_timing {
        u8 vfreq_aspect;
 } __attribute__((packed));
 
-#define DRM_EDID_PT_SYNC_MASK              (3 << 3)
-# define DRM_EDID_PT_ANALOG_CSYNC          (0 << 3)
-# define DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC  (1 << 3)
-# define DRM_EDID_PT_DIGITAL_CSYNC         (2 << 3)
-#  define DRM_EDID_PT_CSYNC_ON_RGB         (1 << 1) /* analog csync only */
-#  define DRM_EDID_PT_CSYNC_SERRATE        (1 << 2)
-# define DRM_EDID_PT_DIGITAL_SEPARATE_SYNC (3 << 3)
-#  define DRM_EDID_PT_HSYNC_POSITIVE       (1 << 1) /* also digital csync */
-#  define DRM_EDID_PT_VSYNC_POSITIVE       (1 << 2)
+#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1)
+#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2)
+#define DRM_EDID_PT_SEPARATE_SYNC  (3 << 3)
 #define DRM_EDID_PT_STEREO         (1 << 5)
 #define DRM_EDID_PT_INTERLACED     (1 << 7)
 
index 4977e0a..fad3c40 100644 (file)
@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+void drm_kms_helper_poll_reschedule(struct drm_device *dev);
 bool drm_kms_helper_is_poll_worker(void);
 
 enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
index 847da6f..31029f4 100644 (file)
@@ -12,7 +12,7 @@
 
 #define ARMV8_PMU_CYCLE_IDX            (ARMV8_PMU_MAX_COUNTERS - 1)
 
-#ifdef CONFIG_HW_PERF_EVENTS
+#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
 
 struct kvm_pmc {
        u8 idx; /* index into the pmu->pmc array */
@@ -74,6 +74,7 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
 struct kvm_pmu_events *kvm_get_pmu_events(void);
 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
 void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+void kvm_vcpu_pmu_resync_el0(void);
 
 #define kvm_vcpu_has_pmu(vcpu)                                 \
        (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
@@ -171,6 +172,7 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
 {
        return 0;
 }
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
 
 #endif
 
index c4f5b52..11984ed 100644 (file)
@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
 static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 {
        bio->bi_opf |= REQ_POLLED;
-       if (!is_sync_kiocb(kiocb))
+       if (kiocb->ki_flags & IOCB_NOWAIT)
                bio->bi_opf |= REQ_NOWAIT;
 }
 
index ed44a99..87d94be 100644 (file)
@@ -969,7 +969,6 @@ struct blk_plug {
 
        bool multiple_queues;
        bool has_elevator;
-       bool nowait;
 
        struct list_head cb_list; /* md requires an unplug callback */
 };
index 1ef0133..06f1b29 100644 (file)
@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
  */
 bool clk_is_match(const struct clk *p, const struct clk *q);
 
+/**
+ * clk_rate_exclusive_get - get exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to get exclusive control over the rate of a
+ * provider. It prevents any other consumer to execute, even indirectly,
+ * opereation which could alter the rate of the provider or cause glitches
+ *
+ * If exlusivity is claimed more than once on clock, even by the same driver,
+ * the rate effectively gets locked as exclusivity can't be preempted.
+ *
+ * Must not be called from within atomic context.
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_rate_exclusive_get(struct clk *clk);
+
+/**
+ * clk_rate_exclusive_put - release exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to release the exclusivity it previously got
+ * from clk_rate_exclusive_get()
+ *
+ * The caller must balance the number of clk_rate_exclusive_get() and
+ * clk_rate_exclusive_put() calls.
+ *
+ * Must not be called from within atomic context.
+ */
+void clk_rate_exclusive_put(struct clk *clk);
+
 #else
 
 static inline int clk_notifier_register(struct clk *clk,
@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
        return p == q;
 }
 
+static inline int clk_rate_exclusive_get(struct clk *clk)
+{
+       return 0;
+}
+
+static inline void clk_rate_exclusive_put(struct clk *clk) {}
+
 #endif
 
 #ifdef CONFIG_HAVE_CLK_PREPARE
@@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
  */
 struct clk *devm_get_clk_from_child(struct device *dev,
                                    struct device_node *np, const char *con_id);
-/**
- * clk_rate_exclusive_get - get exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to get exclusive control over the rate of a
- * provider. It prevents any other consumer to execute, even indirectly,
- * opereation which could alter the rate of the provider or cause glitches
- *
- * If exlusivity is claimed more than once on clock, even by the same driver,
- * the rate effectively gets locked as exclusivity can't be preempted.
- *
- * Must not be called from within atomic context.
- *
- * Returns success (0) or negative errno.
- */
-int clk_rate_exclusive_get(struct clk *clk);
-
-/**
- * clk_rate_exclusive_put - release exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to release the exclusivity it previously got
- * from clk_rate_exclusive_get()
- *
- * The caller must balance the number of clk_rate_exclusive_get() and
- * clk_rate_exclusive_put() calls.
- *
- * Must not be called from within atomic context.
- */
-void clk_rate_exclusive_put(struct clk *clk);
 
 /**
  * clk_enable - inform the system when the clock source should be running.
@@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
 
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
-
-static inline int clk_rate_exclusive_get(struct clk *clk)
-{
-       return 0;
-}
-
-static inline void clk_rate_exclusive_put(struct clk *clk) {}
-
 static inline int clk_enable(struct clk *clk)
 {
        return 0;
index 6e6e57e..e006c71 100644 (file)
@@ -70,6 +70,10 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
                                        char *buf);
 extern ssize_t cpu_show_retbleed(struct device *dev,
                                 struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
+                                            struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_gds(struct device *dev,
+                           struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
index 2028438..e718dbe 100644 (file)
@@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 #endif
 
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
-                                  unsigned long addr, pmd_t *pmd,
-                                  unsigned int flags);
 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                           pmd_t *pmd, unsigned long addr, unsigned long next);
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
index 9d3ac77..1b583f3 100644 (file)
@@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
                                      struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
-                               unsigned long *vcpu_bitmap);
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID            0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
@@ -256,11 +254,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+       pte_t pte;
+};
+
 struct kvm_gfn_range {
        struct kvm_memory_slot *slot;
        gfn_t start;
        gfn_t end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        bool may_block;
 };
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -1359,6 +1361,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot);
 
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1387,10 +1392,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                        unsigned long mask);
 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
 
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
                      int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1479,11 +1481,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        return -ENOTSUPP;
 }
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                                   gfn_t gfn, u64 nr_pages)
+{
+       return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
 #endif
 
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -2148,8 +2162,6 @@ struct kvm_device_ops {
        int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
 };
 
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
 void kvm_unregister_device_ops(u32 type);
index 406ab9e..34f9dba 100644 (file)
@@ -3421,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
  * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
  * a (NUMA hinting) fault is required.
  */
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+                                          unsigned int flags)
 {
        /*
-        * FOLL_FORCE has to be able to make progress even if the VMA is
-        * inaccessible. Further, FOLL_FORCE access usually does not represent
-        * application behaviour and we should avoid triggering NUMA hinting
-        * faults.
+        * If callers don't want to honor NUMA hinting faults, no need to
+        * determine if we would actually have to trigger a NUMA hinting fault.
         */
-       return flags & FOLL_FORCE;
+       if (!(flags & FOLL_HONOR_NUMA_FAULT))
+               return true;
+
+       /*
+        * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+        *
+        * Requiring a fault here even for inaccessible VMAs would mean that
+        * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+        * refuses to process NUMA hinting faults in inaccessible VMAs.
+        */
+       return !vma_is_accessible(vma);
 }
 
 typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
index 5e74ce4..7d30dc4 100644 (file)
@@ -1286,6 +1286,15 @@ enum {
        FOLL_PCI_P2PDMA = 1 << 10,
        /* allow interrupts from generic signals */
        FOLL_INTERRUPTIBLE = 1 << 11,
+       /*
+        * Always honor (trigger) NUMA hinting faults.
+        *
+        * FOLL_WRITE implicitly honors NUMA hinting faults because a
+        * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+        * apply). get_user_pages_fast_only() always implicitly honors NUMA
+        * hinting faults.
+        */
+       FOLL_HONOR_NUMA_FAULT = 1 << 12,
 
        /* See also internal only FOLL flags in mm/internal.h */
 };
index 27a6df4..27cd1e5 100644 (file)
@@ -6,6 +6,16 @@
 
 struct mm_walk;
 
+/* Locking requirement during a page walk. */
+enum page_walk_lock {
+       /* mmap_lock should be locked for read to stabilize the vma tree */
+       PGWALK_RDLOCK = 0,
+       /* vma will be write-locked during the walk */
+       PGWALK_WRLOCK = 1,
+       /* vma is expected to be already write-locked during the walk */
+       PGWALK_WRLOCK_VERIFY = 2,
+};
+
 /**
  * struct mm_walk_ops - callbacks for walk_page_range
  * @pgd_entry:         if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
        int (*pre_vma)(unsigned long start, unsigned long end,
                       struct mm_walk *walk);
        void (*post_vma)(struct mm_walk *walk);
+       enum page_walk_lock walk_lock;
 };
 
 /*
index b83a3f9..b068e2e 100644 (file)
@@ -25,11 +25,10 @@ struct page;
        prefetch() should be defined by the architecture, if not, the 
        #define below provides a no-op define.  
        
-       There are 3 prefetch() macros:
+       There are 2 prefetch() macros:
        
        prefetch(x)     - prefetches the cacheline at "x" for read
        prefetchw(x)    - prefetches the cacheline at "x" for write
-       spin_lock_prefetch(x) - prefetches the spinlock *x for taking
        
        there is also PREFETCH_STRIDE which is the architecure-preferred 
        "lookahead" size for prefetching streamed operations.
@@ -44,10 +43,6 @@ struct page;
 #define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
-#ifndef ARCH_HAS_SPINLOCK_PREFETCH
-#define spin_lock_prefetch(x) prefetchw(x)
-#endif
-
 #ifndef PREFETCH_STRIDE
 #define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
 #endif
index 6a9b177..e50416b 100644 (file)
@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
        
 struct raid_template *raid_class_attach(struct raid_function_template *);
 void raid_class_release(struct raid_template *);
-
-int __must_check raid_component_add(struct raid_template *, struct device *,
-                                   struct device *);
-
index 6d58c57..a156d2e 100644 (file)
@@ -459,7 +459,8 @@ struct uart_port {
                                                struct serial_rs485 *rs485);
        int                     (*iso7816_config)(struct uart_port *,
                                                  struct serial_iso7816 *iso7816);
-       int                     ctrl_id;                /* optional serial core controller id */
+       unsigned int            ctrl_id;                /* optional serial core controller id */
+       unsigned int            port_id;                /* optional serial core port id */
        unsigned int            irq;                    /* irq number */
        unsigned long           irqflags;               /* irq flags  */
        unsigned int            uartclk;                /* base uart clock */
index 054d791..c163751 100644 (file)
@@ -62,6 +62,7 @@ struct sk_psock_progs {
 
 enum sk_psock_state_bits {
        SK_PSOCK_TX_ENABLED,
+       SK_PSOCK_RX_STRP_ENABLED,
 };
 
 struct sk_psock_link {
index 6a1e8f1..4ee9d13 100644 (file)
@@ -283,6 +283,7 @@ enum tpm_chip_flags {
        TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED    = BIT(6),
        TPM_CHIP_FLAG_FIRMWARE_UPGRADE          = BIT(7),
        TPM_CHIP_FLAG_SUSPENDED                 = BIT(8),
+       TPM_CHIP_FLAG_HWRNG_DISABLED            = BIT(9),
 };
 
 #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
index 3930e67..1e8bbdb 100644 (file)
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
 extern __printf(2, 3)
 void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
 
+/* Used to find the offset and length of dynamic fields in trace events */
+struct trace_dynamic_info {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       u16     offset;
+       u16     len;
+#else
+       u16     len;
+       u16     offset;
+#endif
+};
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
index bdf8de2..7b4dd69 100644 (file)
@@ -155,6 +155,10 @@ retry:
                if (gso_type & SKB_GSO_UDP)
                        nh_off -= thlen;
 
+               /* Kernel has a special handling for GSO_BY_FRAGS. */
+               if (gso_size == GSO_BY_FRAGS)
+                       return -EINVAL;
+
                /* Too small packets are not really GSO ones. */
                if (skb->len - nh_off > gso_size) {
                        shinfo->gso_size = gso_size;
index 30ac427..5b8b1b6 100644 (file)
@@ -722,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
 }
 
 /* Caller must hold rcu_read_lock() for read */
-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
 {
        struct list_head *iter;
        struct slave *tmp;
-       struct netdev_hw_addr *ha;
 
        bond_for_each_slave_rcu(bond, tmp, iter)
                if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
                        return true;
-
-       if (netdev_uc_empty(bond->dev))
-               return false;
-
-       netdev_for_each_uc_addr(ha, bond->dev)
-               if (ether_addr_equal_64bits(mac, ha->addr))
-                       return true;
-
        return false;
 }
 
index 7c7d03a..d6fa7c8 100644 (file)
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
        if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
                return NULL;
 
+       if (iftype == NL80211_IFTYPE_AP_VLAN)
+               iftype = NL80211_IFTYPE_AP;
+
        for (i = 0; i < sband->n_iftype_data; i++)  {
                const struct ieee80211_sband_iftype_data *data =
                        &sband->iftype_data[i];
index 0bb32bf..491ceb7 100644 (file)
@@ -222,8 +222,8 @@ struct inet_sock {
        __s16                   uc_ttl;
        __u16                   cmsg_flags;
        struct ip_options_rcu __rcu     *inet_opt;
+       atomic_t                inet_id;
        __be16                  inet_sport;
-       __u16                   inet_id;
 
        __u8                    tos;
        __u8                    min_ttl;
index 3325211..19adacd 100644 (file)
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
         * generator as much as we can.
         */
        if (sk && inet_sk(sk)->inet_daddr) {
-               iph->id = htons(inet_sk(sk)->inet_id);
-               inet_sk(sk)->inet_id += segs;
+               int val;
+
+               /* avoid atomic operations for TCP,
+                * as we hold socket lock at this point.
+                */
+               if (sk_is_tcp(sk)) {
+                       sock_owned_by_me(sk);
+                       val = atomic_read(&inet_sk(sk)->inet_id);
+                       atomic_set(&inet_sk(sk)->inet_id, val + segs);
+               } else {
+                       val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+               }
+               iph->id = htons(val);
                return;
        }
        if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
index 3a8a2d2..2a55ae9 100644 (file)
@@ -6612,6 +6612,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
  * marks frames marked in the bitmap as having been filtered. Afterwards, it
  * checks if any frames in the window starting from @ssn can now be released
  * (in case they were only waiting for frames that were filtered.)
+ * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames)
  */
 void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
                                          u16 ssn, u64 filtered,
index 640441a..dd40c75 100644 (file)
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
  *
  *     @list: table set list node
  *     @bindings: list of set bindings
+ *     @refs: internal refcounting for async set destruction
  *     @table: table this set belongs to
  *     @net: netnamespace this set belongs to
  *     @name: name of the set
@@ -533,6 +534,7 @@ struct nft_set_elem_expr {
  *     @expr: stateful expression
  *     @ops: set ops
  *     @flags: set flags
+ *     @dead: set will be freed, never cleared
  *     @genmask: generation mask
  *     @klen: key length
  *     @dlen: data length
@@ -541,6 +543,7 @@ struct nft_set_elem_expr {
 struct nft_set {
        struct list_head                list;
        struct list_head                bindings;
+       refcount_t                      refs;
        struct nft_table                *table;
        possible_net_t                  net;
        char                            *name;
@@ -562,7 +565,8 @@ struct nft_set {
        struct list_head                pending_update;
        /* runtime data below here */
        const struct nft_set_ops        *ops ____cacheline_aligned;
-       u16                             flags:14,
+       u16                             flags:13,
+                                       dead:1,
                                        genmask:2;
        u8                              klen;
        u8                              dlen;
@@ -583,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
        return (void *)set->data;
 }
 
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+       return refcount_read(&s->refs) != 1;
+}
+
 static inline struct nft_set *nft_set_container_of(const void *priv)
 {
        return (void *)priv - offsetof(struct nft_set, data);
@@ -596,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
 
 struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
                                            const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
@@ -813,62 +821,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
                                const struct nft_set *set, void *elem);
 
-/**
- *     struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- *     @rcu: rcu head
- *     @set: set the elements belong to
- *     @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
-       struct rcu_head                 rcu;
-       const struct nft_set            *set;
-       unsigned int                    cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE  ((PAGE_SIZE -                             \
-                                 sizeof(struct nft_set_gc_batch_head)) / \
-                                sizeof(void *))
-
-/**
- *     struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- *     @head: GC batch head
- *     @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
-       struct nft_set_gc_batch_head    head;
-       void                            *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
-       if (gcb != NULL)
-               call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
-                      gfp_t gfp)
-{
-       if (gcb != NULL) {
-               if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
-                       return gcb;
-               nft_set_gc_batch_complete(gcb);
-       }
-       return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
-                                       void *elem)
-{
-       gcb->elems[gcb->head.cnt++] = elem;
-}
-
 struct nft_expr_ops;
 /**
  *     struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1509,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
 
 #endif /* IS_ENABLED(CONFIG_NF_TABLES) */
 
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
 
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  2
+#define NFT_SET_ELEM_DEAD_BIT  2
 #elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
 #else
 #error
 #endif
 
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
 {
        unsigned long *word = (unsigned long *)ext;
 
        BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
-       return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       set_bit(NFT_SET_ELEM_DEAD_BIT, word);
 }
 
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
 {
        unsigned long *word = (unsigned long *)ext;
 
-       clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+       return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
 }
 
 /**
@@ -1732,6 +1675,38 @@ struct nft_trans_flowtable {
 #define nft_trans_flowtable_flags(trans)       \
        (((struct nft_trans_flowtable *)trans->data)->flags)
 
+#define NFT_TRANS_GC_BATCHCOUNT        256
+
+struct nft_trans_gc {
+       struct list_head        list;
+       struct net              *net;
+       struct nft_set          *set;
+       u32                     seq;
+       u8                      count;
+       void                    *priv[NFT_TRANS_GC_BATCHCOUNT];
+       struct rcu_head         rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem);
+
 int __init nft_chain_filter_init(void);
 void nft_chain_filter_fini(void);
 
@@ -1758,6 +1733,8 @@ struct nftables_pernet {
        struct mutex            commit_mutex;
        u64                     table_handle;
        unsigned int            base_seq;
+       unsigned int            gc_seq;
+       u8                      validate_state;
 };
 
 extern unsigned int nf_tables_net_id;
index d9076a7..6506221 100644 (file)
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
 int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
                        u32 portid, const struct nlmsghdr *nlh);
 
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
-                       struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+                            struct netlink_ext_ack *exterr);
 struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
 
 #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
index 2eb916d..690e221 100644 (file)
@@ -1323,6 +1323,7 @@ struct proto {
        /*
         * Pressure flag: try to collapse.
         * Technical note: it is used by multiple contexts non atomically.
+        * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
         * All the __sk_mem_schedule() is of this nature: accounting
         * is strict, actions are advisory and have some latency.
         */
@@ -1420,6 +1421,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
        return sk->sk_prot->memory_pressure != NULL;
 }
 
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+       return sk->sk_prot->memory_pressure &&
+               !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
 static inline bool sk_under_memory_pressure(const struct sock *sk)
 {
        if (!sk->sk_prot->memory_pressure)
@@ -1429,7 +1436,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
            mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;
 
-       return !!*sk->sk_prot->memory_pressure;
+       return !!READ_ONCE(*sk->sk_prot->memory_pressure);
 }
 
 static inline long
@@ -1506,7 +1513,7 @@ proto_memory_pressure(struct proto *prot)
 {
        if (!prot->memory_pressure)
                return false;
-       return !!*prot->memory_pressure;
+       return !!READ_ONCE(*prot->memory_pressure);
 }
 
 
index 151ca95..363c7d5 100644 (file)
@@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
                if (dev->xfrmdev_ops->xdo_dev_state_free)
                        dev->xfrmdev_ops->xdo_dev_state_free(x);
                xso->dev = NULL;
+               xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
                netdev_put(dev, &xso->dev_tracker);
        }
 }
index bf06db8..7b1ddff 100644 (file)
@@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set,
                __field(const void *, skaddr)
                __field(__u16, sport)
                __field(__u16, dport)
+               __field(__u16, family)
                __array(__u8, saddr, 4)
                __array(__u8, daddr, 4)
                __array(__u8, saddr_v6, 16)
@@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set,
 
                __entry->sport = ntohs(inet->inet_sport);
                __entry->dport = ntohs(inet->inet_dport);
+               __entry->family = sk->sk_family;
 
                p32 = (__be32 *) __entry->saddr;
                *p32 = inet->inet_saddr;
@@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set,
                __entry->cong_state = ca_state;
        ),
 
-       TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+       TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+                 show_family_name(__entry->family),
                  __entry->sport, __entry->dport,
                  __entry->saddr, __entry->daddr,
                  __entry->saddr_v6, __entry->daddr_v6,
index 0c8cf35..e0e1591 100644 (file)
@@ -443,7 +443,6 @@ typedef struct elf64_shdr {
 #define NT_MIPS_DSP    0x800           /* MIPS DSP ASE registers */
 #define NT_MIPS_FP_MODE        0x801           /* MIPS floating-point mode */
 #define NT_MIPS_MSA    0x802           /* MIPS SIMD registers */
-#define NT_RISCV_VECTOR        0x900           /* RISC-V vector registers */
 #define NT_LOONGARCH_CPUCFG    0xa00   /* LoongArch CPU config registers */
 #define NT_LOONGARCH_CSR       0xa01   /* LoongArch control and status registers */
 #define NT_LOONGARCH_LSX       0xa02   /* LoongArch Loongson SIMD Extension registers */
index b958c2e..418eef6 100644 (file)
@@ -38,18 +38,6 @@ struct kyrofb_info {
        int wc_cookie;
 };
 
-extern int kyro_dev_init(void);
-extern void kyro_dev_reset(void);
-
-extern unsigned char *kyro_dev_physical_fb_ptr(void);
-extern unsigned char *kyro_dev_virtual_fb_ptr(void);
-extern void *kyro_dev_physical_regs_ptr(void);
-extern void *kyro_dev_virtual_regs_ptr(void);
-extern unsigned int kyro_dev_fb_size(void);
-extern unsigned int kyro_dev_regs_size(void);
-
-extern u32 kyro_dev_overlay_offset(void);
-
 /*
  * benedict.gaster@superh.com
  * Added the follow IOCTLS for the creation of overlay services...
index f4591b9..93db3e4 100644 (file)
@@ -3470,6 +3470,8 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
         * - use the kernel virtual address of the shared io_uring context
         *   (instead of the userspace-provided address, which has to be 0UL
         *   anyway).
+        * - use the same pgoff which the get_unmapped_area() uses to
+        *   calculate the page colouring.
         * For architectures without such aliasing requirements, the
         * architecture will return any suitable mapping because addr is 0.
         */
@@ -3478,6 +3480,7 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
        pgoff = 0;      /* has been translated to ptr above */
 #ifdef SHM_COLOUR
        addr = (uintptr_t) ptr;
+       pgoff = addr >> PAGE_SHIFT;
 #else
        addr = 0UL;
 #endif
index 10ca57f..e3fae26 100644 (file)
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
 {
        /*
         * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
-        * it'll always -EAGAIN
+        * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+        * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+        * async for.
         */
-       return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+       return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
 }
 
 static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
index edec335..5f2c668 100644 (file)
@@ -68,11 +68,16 @@ static int irq_sw_resend(struct irq_desc *desc)
                 */
                if (!desc->parent_irq)
                        return -EINVAL;
+
+               desc = irq_to_desc(desc->parent_irq);
+               if (!desc)
+                       return -EINVAL;
        }
 
        /* Add to resend_list and activate the softirq: */
        raw_spin_lock(&irq_resend_lock);
-       hlist_add_head(&desc->resend_node, &irq_resend_list);
+       if (hlist_unhashed(&desc->resend_node))
+               hlist_add_head(&desc->resend_node, &irq_resend_list);
        raw_spin_unlock(&irq_resend_lock);
        tasklet_schedule(&resend_tasklet);
        return 0;
index e1b4bfa..2b4a946 100644 (file)
@@ -1166,7 +1166,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
        int error;
 
        if (!hibernation_available())
-               return 0;
+               return n;
 
        if (len && buf[len-1] == '\n')
                len--;
index b887007..8e64aaa 100644 (file)
@@ -4213,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
         * will point to the same string as current_trace->name.
         */
        mutex_lock(&trace_types_lock);
-       if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+       if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+               /* Close iter->trace before switching to the new current tracer */
+               if (iter->trace->close)
+                       iter->trace->close(iter);
                *iter->trace = *tr->current_trace;
+               /* Reopen the new current tracer */
+               if (iter->trace->open)
+                       iter->trace->open(iter);
+       }
        mutex_unlock(&trace_types_lock);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -5277,11 +5284,17 @@ int tracing_set_cpumask(struct trace_array *tr,
                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+                       ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
+#endif
                }
                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+                       ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
+#endif
                }
        }
        arch_spin_unlock(&tr->max_lock);
@@ -6705,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 
 #endif
 
+static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+       if (cpu == RING_BUFFER_ALL_CPUS) {
+               if (cpumask_empty(tr->pipe_cpumask)) {
+                       cpumask_setall(tr->pipe_cpumask);
+                       return 0;
+               }
+       } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
+               cpumask_set_cpu(cpu, tr->pipe_cpumask);
+               return 0;
+       }
+       return -EBUSY;
+}
+
+static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+       if (cpu == RING_BUFFER_ALL_CPUS) {
+               WARN_ON(!cpumask_full(tr->pipe_cpumask));
+               cpumask_clear(tr->pipe_cpumask);
+       } else {
+               WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
+               cpumask_clear_cpu(cpu, tr->pipe_cpumask);
+       }
+}
+
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
+       int cpu;
        int ret;
 
        ret = tracing_check_open_get_tr(tr);
@@ -6716,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
                return ret;
 
        mutex_lock(&trace_types_lock);
+       cpu = tracing_get_cpu(inode);
+       ret = open_pipe_on_cpu(tr, cpu);
+       if (ret)
+               goto fail_pipe_on_cpu;
 
        /* create a buffer to store the information to pass to userspace */
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter) {
                ret = -ENOMEM;
-               __trace_array_put(tr);
-               goto out;
+               goto fail_alloc_iter;
        }
 
        trace_seq_init(&iter->seq);
@@ -6745,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
        iter->tr = tr;
        iter->array_buffer = &tr->array_buffer;
-       iter->cpu_file = tracing_get_cpu(inode);
+       iter->cpu_file = cpu;
        mutex_init(&iter->mutex);
        filp->private_data = iter;
 
@@ -6755,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        nonseekable_open(inode, filp);
 
        tr->trace_ref++;
-out:
+
        mutex_unlock(&trace_types_lock);
        return ret;
 
 fail:
        kfree(iter);
+fail_alloc_iter:
+       close_pipe_on_cpu(tr, cpu);
+fail_pipe_on_cpu:
        __trace_array_put(tr);
        mutex_unlock(&trace_types_lock);
        return ret;
@@ -6777,7 +6822,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 
        if (iter->trace->pipe_close)
                iter->trace->pipe_close(iter);
-
+       close_pipe_on_cpu(tr, iter->cpu_file);
        mutex_unlock(&trace_types_lock);
 
        free_cpumask_var(iter->started);
@@ -9441,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
                goto out_free_tr;
 
+       if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+               goto out_free_tr;
+
        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
 
        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9482,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
  out_free_tr:
        ftrace_free_ftrace_ops(tr);
        free_trace_buffers(tr);
+       free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
        kfree(tr->name);
        kfree(tr);
@@ -9584,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
        }
        kfree(tr->topts);
 
+       free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
        kfree(tr->name);
        kfree(tr);
@@ -10381,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
        if (trace_create_savedcmd() < 0)
                goto out_free_temp_buffer;
 
+       if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
+               goto out_free_savedcmd;
+
        /* TODO: make the number of buffers hot pluggable with CPUS */
        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
-               goto out_free_savedcmd;
+               goto out_free_pipe_cpumask;
        }
-
        if (global_trace.buffer_disabled)
                tracing_off();
 
@@ -10439,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)
 
        return 0;
 
+out_free_pipe_cpumask:
+       free_cpumask_var(global_trace.pipe_cpumask);
 out_free_savedcmd:
        free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
index e1edc21..73eaec1 100644 (file)
@@ -377,6 +377,8 @@ struct trace_array {
        struct list_head        events;
        struct trace_event_file *trace_marker_file;
        cpumask_var_t           tracing_cpumask; /* only trace on set CPUs */
+       /* one per_cpu trace_pipe can be opened by only one user */
+       cpumask_var_t           pipe_cpumask;
        int                     ref;
        int                     trace_ref;
 #ifdef CONFIG_FUNCTION_TRACER
@@ -1295,6 +1297,14 @@ static inline void trace_branch_disable(void)
 /* set ring buffers to default size if not already done so */
 int tracing_update_buffers(void);
 
+union trace_synth_field {
+       u8                              as_u8;
+       u16                             as_u16;
+       u32                             as_u32;
+       u64                             as_u64;
+       struct trace_dynamic_info       as_dynamic;
+};
+
 struct ftrace_event_field {
        struct list_head        link;
        const char              *name;
index dd398af..9897d0b 100644 (file)
@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,
 
 struct synth_trace_event {
        struct trace_entry      ent;
-       u64                     fields[];
+       union trace_synth_field fields[];
 };
 
 static int synth_event_define_fields(struct trace_event_call *call)
@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)
 
 static void print_synth_event_num_val(struct trace_seq *s,
                                      char *print_fmt, char *name,
-                                     int size, u64 val, char *space)
+                                     int size, union trace_synth_field *val, char *space)
 {
        switch (size) {
        case 1:
-               trace_seq_printf(s, print_fmt, name, (u8)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u8, space);
                break;
 
        case 2:
-               trace_seq_printf(s, print_fmt, name, (u16)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u16, space);
                break;
 
        case 4:
-               trace_seq_printf(s, print_fmt, name, (u32)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u32, space);
                break;
 
        default:
@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
        struct trace_seq *s = &iter->seq;
        struct synth_trace_event *entry;
        struct synth_event *se;
-       unsigned int i, n_u64;
+       unsigned int i, j, n_u64;
        char print_fmt[32];
        const char *fmt;
 
@@ -374,43 +374,28 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
                /* parameter values */
                if (se->fields[i]->is_string) {
                        if (se->fields[i]->is_dynamic) {
-                               u32 offset, data_offset;
-                               char *str_field;
-
-                               offset = (u32)entry->fields[n_u64];
-                               data_offset = offset & 0xffff;
-
-                               str_field = (char *)entry + data_offset;
+                               union trace_synth_field *data = &entry->fields[n_u64];
 
                                trace_seq_printf(s, print_fmt, se->fields[i]->name,
                                                 STR_VAR_LEN_MAX,
-                                                str_field,
+                                                (char *)entry + data->as_dynamic.offset,
                                                 i == se->n_fields - 1 ? "" : " ");
                                n_u64++;
                        } else {
                                trace_seq_printf(s, print_fmt, se->fields[i]->name,
                                                 STR_VAR_LEN_MAX,
-                                                (char *)&entry->fields[n_u64],
+                                                (char *)&entry->fields[n_u64].as_u64,
                                                 i == se->n_fields - 1 ? "" : " ");
                                n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
                        }
                } else if (se->fields[i]->is_stack) {
-                       u32 offset, data_offset, len;
-                       unsigned long *p, *end;
-
-                       offset = (u32)entry->fields[n_u64];
-                       data_offset = offset & 0xffff;
-                       len = offset >> 16;
-
-                       p = (void *)entry + data_offset;
-                       end = (void *)p + len - (sizeof(long) - 1);
+                       union trace_synth_field *data = &entry->fields[n_u64];
+                       unsigned long *p = (void *)entry + data->as_dynamic.offset;
 
                        trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
-
-                       for (; *p && p < end; p++)
-                               trace_seq_printf(s, "=> %pS\n", (void *)*p);
+                       for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
+                               trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
                        n_u64++;
-
                } else {
                        struct trace_print_flags __flags[] = {
                            __def_gfpflag_names, {-1, NULL} };
@@ -419,13 +404,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
                        print_synth_event_num_val(s, print_fmt,
                                                  se->fields[i]->name,
                                                  se->fields[i]->size,
-                                                 entry->fields[n_u64],
+                                                 &entry->fields[n_u64],
                                                  space);
 
                        if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
                                trace_seq_puts(s, " (");
                                trace_print_flags_seq(s, "|",
-                                                     entry->fields[n_u64],
+                                                     entry->fields[n_u64].as_u64,
                                                      __flags);
                                trace_seq_putc(s, ')');
                        }
@@ -454,21 +439,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
        int ret;
 
        if (is_dynamic) {
-               u32 data_offset;
-
-               data_offset = struct_size(entry, fields, event->n_u64);
-               data_offset += data_size;
-
-               len = fetch_store_strlen((unsigned long)str_val);
+               union trace_synth_field *data = &entry->fields[*n_u64];
 
-               data_offset |= len << 16;
-               *(u32 *)&entry->fields[*n_u64] = data_offset;
+               data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
+               data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
 
                ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
 
                (*n_u64)++;
        } else {
-               str_field = (char *)&entry->fields[*n_u64];
+               str_field = (char *)&entry->fields[*n_u64].as_u64;
 
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
                if ((unsigned long)str_val < TASK_SIZE)
@@ -492,6 +472,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
                                 unsigned int data_size,
                                 unsigned int *n_u64)
 {
+       union trace_synth_field *data = &entry->fields[*n_u64];
        unsigned int len;
        u32 data_offset;
        void *data_loc;
@@ -504,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
                        break;
        }
 
-       /* Include the zero'd element if it fits */
-       if (len < HIST_STACKTRACE_DEPTH)
-               len++;
-
        len *= sizeof(long);
 
        /* Find the dynamic section to copy the stack into. */
@@ -515,8 +492,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
        memcpy(data_loc, stack, len);
 
        /* Fill in the field that holds the offset/len combo */
-       data_offset |= len << 16;
-       *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+       data->as_dynamic.offset = data_offset;
+       data->as_dynamic.len = len;
 
        (*n_u64)++;
 
@@ -550,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
                str_val = (char *)(long)var_ref_vals[val_idx];
 
                if (event->dynamic_fields[i]->is_stack) {
-                       len = *((unsigned long *)str_val);
+                       /* reserve one extra element for size */
+                       len = *((unsigned long *)str_val) + 1;
                        len *= sizeof(unsigned long);
                } else {
                        len = fetch_store_strlen((unsigned long)str_val);
@@ -592,19 +571,19 @@ static notrace void trace_event_raw_event_synth(void *__data,
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&entry->fields[n_u64] = (u8)val;
+                               entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&entry->fields[n_u64] = (u16)val;
+                               entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&entry->fields[n_u64] = (u32)val;
+                               entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               entry->fields[n_u64] = val;
+                               entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -1791,19 +1770,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+                               state.entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+                               state.entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+                               state.entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               state.entry->fields[n_u64] = val;
+                               state.entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -1884,19 +1863,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+                               state.entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+                               state.entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+                               state.entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               state.entry->fields[n_u64] = val;
+                               state.entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -2031,19 +2010,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
        } else {
                switch (field->size) {
                case 1:
-                       *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+                       trace_state->entry->fields[field->offset].as_u8 = (u8)val;
                        break;
 
                case 2:
-                       *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+                       trace_state->entry->fields[field->offset].as_u16 = (u16)val;
                        break;
 
                case 4:
-                       *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+                       trace_state->entry->fields[field->offset].as_u32 = (u32)val;
                        break;
 
                default:
-                       trace_state->entry->fields[field->offset] = val;
+                       trace_state->entry->fields[field->offset].as_u64 = val;
                        break;
                }
        }
index 590b3d5..ba37f76 100644 (file)
@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
 {
        if (is_graph(iter->tr))
                graph_trace_open(iter);
-
+       else
+               iter->private = NULL;
 }
 
 static void irqsoff_trace_close(struct trace_iterator *iter)
index 330aee1..0469a04 100644 (file)
@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
 {
        if (is_graph(iter->tr))
                graph_trace_open(iter);
+       else
+               iter->private = NULL;
 }
 
 static void wakeup_trace_close(struct trace_iterator *iter)
index 02a8f40..800b420 100644 (file)
@@ -52,6 +52,7 @@
 #include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/kvm_para.h>
+#include <linux/delay.h>
 
 #include "workqueue_internal.h"
 
@@ -338,8 +339,10 @@ static cpumask_var_t *wq_numa_possible_cpumask;
  * Per-cpu work items which run for longer than the following threshold are
  * automatically considered CPU intensive and excluded from concurrency
  * management to prevent them from noticeably delaying other per-cpu work items.
+ * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
+ * The actual value is initialized in wq_cpu_intensive_thresh_init().
  */
-static unsigned long wq_cpu_intensive_thresh_us = 10000;
+static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
 module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
 
 static bool wq_disable_numa;
@@ -6513,6 +6516,42 @@ void __init workqueue_init_early(void)
               !system_freezable_power_efficient_wq);
 }
 
+static void __init wq_cpu_intensive_thresh_init(void)
+{
+       unsigned long thresh;
+       unsigned long bogo;
+
+       /* if the user set it to a specific value, keep it */
+       if (wq_cpu_intensive_thresh_us != ULONG_MAX)
+               return;
+
+       /*
+        * The default of 10ms is derived from the fact that most modern (as of
+        * 2023) processors can do a lot in 10ms and that it's just below what
+        * most consider human-perceivable. However, the kernel also runs on a
+        * lot slower CPUs including microcontrollers where the threshold is way
+        * too low.
+        *
+        * Let's scale up the threshold upto 1 second if BogoMips is below 4000.
+        * This is by no means accurate but it doesn't have to be. The mechanism
+        * is still useful even when the threshold is fully scaled up. Also, as
+        * the reports would usually be applicable to everyone, some machines
+        * operating on longer thresholds won't significantly diminish their
+        * usefulness.
+        */
+       thresh = 10 * USEC_PER_MSEC;
+
+       /* see init/calibrate.c for lpj -> BogoMIPS calculation */
+       bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1);
+       if (bogo < 4000)
+               thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC);
+
+       pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
+                loops_per_jiffy, bogo, thresh);
+
+       wq_cpu_intensive_thresh_us = thresh;
+}
+
 /**
  * workqueue_init - bring workqueue subsystem fully online
  *
@@ -6528,6 +6567,8 @@ void __init workqueue_init(void)
        struct worker_pool *pool;
        int cpu, bkt;
 
+       wq_cpu_intensive_thresh_init();
+
        /*
         * It'd be simpler to initialize NUMA in workqueue_init_early() but
         * CPU to node mapping may not be available that early on some
index fbc89ba..d679851 100644 (file)
@@ -1200,7 +1200,7 @@ config WQ_CPU_INTENSIVE_REPORT
        help
          Say Y here to enable reporting of concurrency-managed per-cpu work
          items that hog CPUs for longer than
-         workqueue.cpu_intensive_threshold_us. Workqueue automatically
+         workqueue.cpu_intensive_thresh_us. Workqueue automatically
          detects and excludes them from concurrency management to prevent
          them from stalling other per-cpu work items. Occassional
          triggering may not necessarily indicate a problem. Repeated
index 0d3a686..fb8c0c5 100644 (file)
@@ -28,36 +28,16 @@ int __weak __clzsi2(int val)
 }
 EXPORT_SYMBOL(__clzsi2);
 
-int __weak __clzdi2(long val);
-int __weak __ctzdi2(long val);
-#if BITS_PER_LONG == 32
-
-int __weak __clzdi2(long val)
+int __weak __clzdi2(u64 val);
+int __weak __clzdi2(u64 val)
 {
-       return 32 - fls((int)val);
+       return 64 - fls64(val);
 }
 EXPORT_SYMBOL(__clzdi2);
 
-int __weak __ctzdi2(long val)
+int __weak __ctzdi2(u64 val);
+int __weak __ctzdi2(u64 val)
 {
-       return __ffs((u32)val);
+       return __ffs64(val);
 }
 EXPORT_SYMBOL(__ctzdi2);
-
-#elif BITS_PER_LONG == 64
-
-int __weak __clzdi2(long val)
-{
-       return 64 - fls64((u64)val);
-}
-EXPORT_SYMBOL(__clzdi2);
-
-int __weak __ctzdi2(long val)
-{
-       return __ffs64((u64)val);
-}
-EXPORT_SYMBOL(__ctzdi2);
-
-#else
-#error BITS_PER_LONG not 32 or 64
-#endif
index 4dd73cf..f723024 100644 (file)
@@ -4265,6 +4265,10 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
  * mas_wr_append: Attempt to append
  * @wr_mas: the maple write state
  *
+ * This is currently unsafe in rcu mode since the end of the node may be cached
+ * by readers while the node contents may be updated which could result in
+ * inaccurate information.
+ *
  * Return: True if appended, false otherwise
  */
 static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
@@ -4274,6 +4278,9 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
        struct ma_state *mas = wr_mas->mas;
        unsigned char node_pivots = mt_pivots[wr_mas->type];
 
+       if (mt_in_rcu(mas->tree))
+               return false;
+
        if (mas->offset != wr_mas->node_end)
                return false;
 
index 1a31065..976b9bd 100644 (file)
@@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 void __rcu **radix_tree_iter_resume(void __rcu **slot,
                                        struct radix_tree_iter *iter)
 {
-       slot++;
        iter->index = __radix_tree_iter_add(iter, 1);
        iter->next_index = iter->index;
        iter->tags = 0;
index e86231a..c65566b 100644 (file)
@@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
 
 failed:
        while (sgtable->nents > sgtable->orig_nents)
-               put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+               unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
        return res;
 }
 
index dbc9f86..eacca27 100644 (file)
@@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
                /*
                 * Check if the pageblock has already been marked skipped.
-                * Only the aligned PFN is checked as the caller isolates
+                * Only the first PFN is checked as the caller isolates
                 * COMPACT_CLUSTER_MAX at a time so the second call must
                 * not falsely conclude that the block should be skipped.
                 */
-               if (!valid_page && pageblock_aligned(low_pfn)) {
+               if (!valid_page && (pageblock_aligned(low_pfn) ||
+                                   low_pfn == cc->zone->zone_start_pfn)) {
                        if (!isolation_suitable(cc, page)) {
                                low_pfn = end_pfn;
                                folio = NULL;
@@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
                 * before making it "skip" so other compaction instances do
                 * not scan the same block.
                 */
-               if (pageblock_aligned(low_pfn) &&
+               if ((pageblock_aligned(low_pfn) ||
+                    low_pfn == cc->zone->zone_start_pfn) &&
                    !fast_find_block && !isolation_suitable(cc, page))
                        continue;
 
index 91cff7f..eb95809 100644 (file)
@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
                return NULL;
        filter->type = type;
        filter->matching = matching;
+       INIT_LIST_HEAD(&filter->list);
        return filter;
 }
 
index 2fcc973..e0e59d4 100644 (file)
@@ -386,6 +386,7 @@ out:
 static const struct mm_walk_ops damon_mkold_ops = {
        .pmd_entry = damon_mkold_pmd_entry,
        .hugetlb_entry = damon_mkold_hugetlb_entry,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ out:
 static const struct mm_walk_ops damon_young_ops = {
        .pmd_entry = damon_young_pmd_entry,
        .hugetlb_entry = damon_young_hugetlb_entry,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
index 76d222c..6e2f9e9 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
        pte = ptep_get(ptep);
        if (!pte_present(pte))
                goto no_page;
-       if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+       if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
                goto no_page;
 
        page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
        if (likely(!pmd_trans_huge(pmdval)))
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
-       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
                return no_page_table(vma, flags);
 
        ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
                return NULL;
 
+       /*
+        * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+        * to fail on PROT_NONE-mapped pages.
+        */
        page = follow_page_mask(vma, address, foll_flags, &ctx);
        if (ctx.pgmap)
                put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
                gup_flags |= FOLL_UNLOCKABLE;
        }
 
+       /*
+        * For now, always trigger NUMA hinting faults. Some GUP users like
+        * KVM require the hint to be as the calling context of GUP is
+        * functionally similar to a memory reference from task context.
+        */
+       gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
        /* FOLL_GET and FOLL_PIN are mutually exclusive. */
        if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
                         (FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                struct page *page;
                struct folio *folio;
 
-               if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+               /*
+                * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+                * pte_access_permitted() better should reject these pages
+                * either way: otherwise, GUP-fast might succeed in
+                * cases where ordinary GUP would fail due to VMA access
+                * permissions.
+                */
+               if (pte_protnone(pte))
                        goto pte_unmap;
 
                if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
 
                if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
                             pmd_devmap(pmd))) {
-                       if (pmd_protnone(pmd) &&
-                           !gup_can_follow_protnone(flags))
+                       /* See gup_pte_range() */
+                       if (pmd_protnone(pmd))
                                return 0;
 
                        if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(unsigned long start,
        if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
                                       FOLL_FORCE | FOLL_PIN | FOLL_GET |
                                       FOLL_FAST_ONLY | FOLL_NOFAULT |
-                                      FOLL_PCI_P2PDMA)))
+                                      FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
                return -EINVAL;
 
        if (gup_flags & FOLL_PIN)
index 855e25e..277ddca 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
        .pte_hole       = hmm_vma_walk_hole,
        .hugetlb_entry  = hmm_vma_walk_hugetlb_entry,
        .test_walk      = hmm_vma_walk_test,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 /**
index eb36783..164d223 100644 (file)
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
        if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
                return ERR_PTR(-EFAULT);
 
-       /* Full NUMA hinting faults to serialise migration in fault paths */
-       if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+       if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
                return NULL;
 
        if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
@@ -1613,7 +1612,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
         * If other processes are mapping this folio, we couldn't discard
         * the folio unless they all do MADV_FREE so let's skip the folio.
         */
-       if (folio_mapcount(folio) != 1)
+       if (folio_estimated_sharers(folio) != 1)
                goto out;
 
        if (!folio_trylock(folio))
index 64a3239..6da626b 100644 (file)
@@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                unsigned int order) { }
 #endif
 
+static inline void __clear_hugetlb_destructor(struct hstate *h,
+                                               struct folio *folio)
+{
+       lockdep_assert_held(&hugetlb_lock);
+
+       /*
+        * Very subtle
+        *
+        * For non-gigantic pages set the destructor to the normal compound
+        * page dtor.  This is needed in case someone takes an additional
+        * temporary ref to the page, and freeing is delayed until they drop
+        * their reference.
+        *
+        * For gigantic pages set the destructor to the null dtor.  This
+        * destructor will never be called.  Before freeing the gigantic
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
+        * apply.
+        *
+        */
+       if (hstate_is_gigantic(h))
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
+       else
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
+}
+
 /*
- * Remove hugetlb folio from lists, and update dtor so that the folio appears
- * as just a compound page.
+ * Remove hugetlb folio from lists.
+ * If vmemmap exists for the folio, update dtor so that the folio appears
+ * as just a compound page.  Otherwise, wait until after allocating vmemmap
+ * to update dtor.
  *
  * A reference is held on the folio, except in the case of demote.
  *
@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
        }
 
        /*
-        * Very subtle
-        *
-        * For non-gigantic pages set the destructor to the normal compound
-        * page dtor.  This is needed in case someone takes an additional
-        * temporary ref to the page, and freeing is delayed until they drop
-        * their reference.
-        *
-        * For gigantic pages set the destructor to the null dtor.  This
-        * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_folio will turn the folio into a
-        * simple group of pages.  After this the destructor does not
-        * apply.
-        *
-        * This handles the case where more than one ref is held when and
-        * after update_and_free_hugetlb_folio is called.
-        *
-        * In the case of demote we do not ref count the page as it will soon
-        * be turned into a page of smaller size.
+        * We can only clear the hugetlb destructor after allocating vmemmap
+        * pages.  Otherwise, someone (memory error handling) may try to write
+        * to tail struct pages.
+        */
+       if (!folio_test_hugetlb_vmemmap_optimized(folio))
+               __clear_hugetlb_destructor(h, folio);
+
+        /*
+         * In the case of demote we do not ref count the page as it will soon
+         * be turned into a page of smaller size.
         */
        if (!demote)
                folio_ref_unfreeze(folio, 1);
-       if (hstate_is_gigantic(h))
-               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
-       else
-               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
 
        h->nr_huge_pages--;
        h->nr_huge_pages_node[nid]--;
@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
 {
        int i;
        struct page *subpage;
+       bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
 
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
        if (unlikely(folio_test_hwpoison(folio)))
                folio_clear_hugetlb_hwpoison(folio);
 
+       /*
+        * If vmemmap pages were allocated above, then we need to clear the
+        * hugetlb destructor under the hugetlb lock.
+        */
+       if (clear_dtor) {
+               spin_lock_irq(&hugetlb_lock);
+               __clear_hugetlb_destructor(h, folio);
+               spin_unlock_irq(&hugetlb_lock);
+       }
+
        for (i = 0; i < pages_per_huge_page(h); i++) {
                subpage = folio_page(folio, i);
                subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
index a7d9e98..8ed127c 100644 (file)
@@ -924,6 +924,13 @@ int migrate_device_coherent_page(struct page *page);
 struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
 int __must_check try_grab_page(struct page *page, unsigned int flags);
 
+/*
+ * mm/huge_memory.c
+ */
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+                                  unsigned long addr, pmd_t *pmd,
+                                  unsigned int flags);
+
 enum {
        /* mark page accessed */
        FOLL_TOUCH = 1 << 16,
@@ -998,6 +1005,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
                smp_rmb();
 
        /*
+        * During GUP-fast we might not get called on the head page for a
+        * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+        * not work with the abstracted hugetlb PTEs that always point at the
+        * head page. For hugetlb, PageAnonExclusive only applies on the head
+        * page (as it cannot be partially COW-shared), so lookup the head page.
+        */
+       if (unlikely(!PageHead(page) && PageHuge(page)))
+               page = compound_head(page);
+
+       /*
         * Note that PageKsm() pages cannot be exclusive, and consequently,
         * cannot get pinned.
         */
index ba26635..d7b5b95 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
 
 static const struct mm_walk_ops break_ksm_ops = {
        .pmd_entry = break_ksm_pmd_entry,
+       .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+       .pmd_entry = break_ksm_pmd_entry,
+       .walk_lock = PGWALK_WRLOCK,
 };
 
 /*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
 {
        vm_fault_t ret = 0;
+       const struct mm_walk_ops *ops = lock_vma ?
+                               &break_ksm_lock_vma_ops : &break_ksm_ops;
 
        do {
                int ksm_page;
 
                cond_resched();
-               ksm_page = walk_page_range_vma(vma, addr, addr + 1,
-                                              &break_ksm_ops, NULL);
+               ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
                if (WARN_ON_ONCE(ksm_page < 0))
                        return ksm_page;
                if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
        mmap_read_lock(mm);
        vma = find_mergeable_vma(mm, addr);
        if (vma)
-               break_ksm(vma, addr);
+               break_ksm(vma, addr, false);
        mmap_read_unlock(mm);
 }
 
@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
  * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
 static int unmerge_ksm_pages(struct vm_area_struct *vma,
-                            unsigned long start, unsigned long end)
+                            unsigned long start, unsigned long end, bool lock_vma)
 {
        unsigned long addr;
        int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
                if (signal_pending(current))
                        err = -ERESTARTSYS;
                else
-                       err = break_ksm(vma, addr);
+                       err = break_ksm(vma, addr, lock_vma);
        }
        return err;
 }
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
                        if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
                                continue;
                        err = unmerge_ksm_pages(vma,
-                                               vma->vm_start, vma->vm_end);
+                                               vma->vm_start, vma->vm_end, false);
                        if (err)
                                goto error;
                }
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
                return 0;
 
        if (vma->anon_vma) {
-               err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+               err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
                if (err)
                        return err;
        }
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
                        return 0;               /* just ignore the advice */
 
                if (vma->anon_vma) {
-                       err = unmerge_ksm_pages(vma, start, end);
+                       err = unmerge_ksm_pages(vma, start, end, true);
                        if (err)
                                return err;
                }
@@ -2784,6 +2791,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
                        anon_vma->root == vma->anon_vma->root) {
                return page;            /* still no need to copy it */
        }
+       if (PageHWPoison(page))
+               return ERR_PTR(-EHWPOISON);
        if (!PageUptodate(page))
                return page;            /* let do_swap_page report the error */
 
index 886f060..ec30f48 100644 (file)
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops swapin_walk_ops = {
        .pmd_entry              = swapin_walk_pmd_entry,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -383,7 +384,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                folio = pfn_folio(pmd_pfn(orig_pmd));
 
                /* Do not interfere with other mappings of this folio */
-               if (folio_mapcount(folio) != 1)
+               if (folio_estimated_sharers(folio) != 1)
                        goto huge_unlock;
 
                if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -457,7 +458,7 @@ regular_folio:
                if (folio_test_large(folio)) {
                        int err;
 
-                       if (folio_mapcount(folio) != 1)
+                       if (folio_estimated_sharers(folio) != 1)
                                break;
                        if (pageout_anon_only_filter && !folio_test_anon(folio))
                                break;
@@ -534,6 +535,7 @@ regular_folio:
 
 static const struct mm_walk_ops cold_walk_ops = {
        .pmd_entry = madvise_cold_or_pageout_pte_range,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -678,7 +680,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                if (folio_test_large(folio)) {
                        int err;
 
-                       if (folio_mapcount(folio) != 1)
+                       if (folio_estimated_sharers(folio) != 1)
                                break;
                        if (!folio_trylock(folio))
                                break;
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops madvise_free_walk_ops = {
        .pmd_entry              = madvise_free_pte_range,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
index e8ca4bd..315fd5f 100644 (file)
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 
 static const struct mm_walk_ops precharge_walk_ops = {
        .pmd_entry      = mem_cgroup_count_precharge_pte_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ put:                      /* get_mctgt_type() gets & locks the page */
 
 static const struct mm_walk_ops charge_walk_ops = {
        .pmd_entry      = mem_cgroup_move_charge_pte_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 static void mem_cgroup_move_charge(void)
index ece5d48..fe121fd 100644 (file)
@@ -831,6 +831,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 static const struct mm_walk_ops hwp_walk_ops = {
        .pmd_entry = hwpoison_pte_range,
        .hugetlb_entry = hwpoison_hugetlb_range,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
@@ -2466,7 +2467,7 @@ int unpoison_memory(unsigned long pfn)
 {
        struct folio *folio;
        struct page *p;
-       int ret = -EBUSY;
+       int ret = -EBUSY, ghp;
        unsigned long count = 1;
        bool huge = false;
        static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2499,6 +2500,13 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
+       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+               goto unlock_mutex;
+
+       /*
+        * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+        * in folio_mapped() has to be done after folio_test_slab() is checked.
+        */
        if (folio_mapped(folio)) {
                unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
                                 pfn, &unpoison_rs);
@@ -2511,32 +2519,28 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
-               goto unlock_mutex;
-
-       ret = get_hwpoison_page(p, MF_UNPOISON);
-       if (!ret) {
+       ghp = get_hwpoison_page(p, MF_UNPOISON);
+       if (!ghp) {
                if (PageHuge(p)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
-                       if (count == 0) {
-                               ret = -EBUSY;
+                       if (count == 0)
                                goto unlock_mutex;
-                       }
                }
                ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
-       } else if (ret < 0) {
-               if (ret == -EHWPOISON) {
+       } else if (ghp < 0) {
+               if (ghp == -EHWPOISON) {
                        ret = put_page_back_buddy(p) ? 0 : -EBUSY;
-               } else
+               } else {
+                       ret = ghp;
                        unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
                                         pfn, &unpoison_rs);
+               }
        } else {
                if (PageHuge(p)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
                        if (count == 0) {
-                               ret = -EBUSY;
                                folio_put(folio);
                                goto unlock_mutex;
                        }
@@ -2737,10 +2741,13 @@ retry:
        if (ret > 0) {
                ret = soft_offline_in_use_page(page);
        } else if (ret == 0) {
-               if (!page_handle_poison(page, true, false) && try_again) {
-                       try_again = false;
-                       flags &= ~MF_COUNT_INCREASED;
-                       goto retry;
+               if (!page_handle_poison(page, true, false)) {
+                       if (try_again) {
+                               try_again = false;
+                               flags &= ~MF_COUNT_INCREASED;
+                               goto retry;
+                       }
+                       ret = -EBUSY;
                }
        }
 
index 603b2f4..cdc4d4c 100644 (file)
@@ -5257,11 +5257,8 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
 
 static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
 {
-       /* Even if this succeeds, make it clear we *might* have slept */
-       if (likely(mmap_read_trylock(mm))) {
-               might_sleep();
+       if (likely(mmap_read_trylock(mm)))
                return true;
-       }
 
        if (regs && !user_mode(regs)) {
                unsigned long ip = instruction_pointer(regs);
@@ -5705,6 +5702,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
        if (mmap_read_lock_killable(mm))
                return 0;
 
+       /* Untag the address before looking up the VMA */
+       addr = untagged_addr_remote(mm, addr);
+
        /* Avoid triggering the temporary warning in __get_user_pages */
        if (!vma_lookup(mm, addr) && !expand_stack(mm, addr))
                return 0;
index c53f8be..ec2eace 100644 (file)
@@ -718,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
        .hugetlb_entry          = queue_folios_hugetlb,
        .pmd_entry              = queue_folios_pte_range,
        .test_walk              = queue_pages_test_walk,
+       .walk_lock              = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+       .hugetlb_entry          = queue_folios_hugetlb,
+       .pmd_entry              = queue_folios_pte_range,
+       .test_walk              = queue_pages_test_walk,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 /*
@@ -738,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                nodemask_t *nodes, unsigned long flags,
-               struct list_head *pagelist)
+               struct list_head *pagelist, bool lock_vma)
 {
        int err;
        struct queue_pages qp = {
@@ -749,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                .end = end,
                .first = NULL,
        };
+       const struct mm_walk_ops *ops = lock_vma ?
+                       &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
 
-       err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+       err = walk_page_range(mm, start, end, ops, &qp);
 
        if (!qp.first)
                /* whole range in hole */
@@ -1078,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
        vma = find_vma(mm, 0);
        VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
        queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
-                       flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+                       flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
 
        if (!list_empty(&pagelist)) {
                err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@@ -1321,12 +1331,8 @@ static long do_mbind(unsigned long start, unsigned long len,
         * Lock the VMAs before scanning for pages to migrate, to ensure we don't
         * miss a concurrently inserted page.
         */
-       vma_iter_init(&vmi, mm, start);
-       for_each_vma_range(vmi, vma, end)
-               vma_start_write(vma);
-
        ret = queue_pages_range(mm, start, end, nmask,
-                         flags | MPOL_MF_INVERT, &pagelist);
+                         flags | MPOL_MF_INVERT, &pagelist, true);
 
        if (ret < 0) {
                err = ret;
index 8365158..d5f4923 100644 (file)
@@ -279,6 +279,7 @@ next:
 static const struct mm_walk_ops migrate_vma_walk_ops = {
        .pmd_entry              = migrate_vma_collect_pmd,
        .pte_hole               = migrate_vma_collect_hole,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
index b7f7a51..dad3622 100644 (file)
@@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
        .pmd_entry              = mincore_pte_range,
        .pte_hole               = mincore_unmapped_range,
        .hugetlb_entry          = mincore_hugetlb,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
index 0a0c996..479e09d 100644 (file)
@@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
 {
        static const struct mm_walk_ops mlock_walk_ops = {
                .pmd_entry = mlock_pte_range,
+               .walk_lock = PGWALK_WRLOCK_VERIFY,
        };
 
        /*
index 6f658d4..3aef134 100644 (file)
@@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
        .pte_entry              = prot_none_pte_entry,
        .hugetlb_entry          = prot_none_hugetlb_entry,
        .test_walk              = prot_none_test,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 int
index 2022333..9b2d23f 100644 (file)
@@ -400,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
        return err;
 }
 
+static inline void process_mm_walk_lock(struct mm_struct *mm,
+                                       enum page_walk_lock walk_lock)
+{
+       if (walk_lock == PGWALK_RDLOCK)
+               mmap_assert_locked(mm);
+       else
+               mmap_assert_write_locked(mm);
+}
+
+static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+                                        enum page_walk_lock walk_lock)
+{
+#ifdef CONFIG_PER_VMA_LOCK
+       switch (walk_lock) {
+       case PGWALK_WRLOCK:
+               vma_start_write(vma);
+               break;
+       case PGWALK_WRLOCK_VERIFY:
+               vma_assert_write_locked(vma);
+               break;
+       case PGWALK_RDLOCK:
+               /* PGWALK_RDLOCK is handled by process_mm_walk_lock */
+               break;
+       }
+#endif
+}
+
 /**
  * walk_page_range - walk page table with caller specific callbacks
  * @mm:                mm_struct representing the target process of page table walk
@@ -459,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
        if (!walk.mm)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
 
        vma = find_vma(walk.mm, start);
        do {
@@ -474,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
                        if (ops->pte_hole)
                                err = ops->pte_hole(start, next, -1, &walk);
                } else { /* inside vma */
+                       process_vma_walk_lock(vma, ops->walk_lock);
                        walk.vma = vma;
                        next = min(end, vma->vm_end);
                        vma = find_vma(mm, vma->vm_end);
@@ -549,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
        if (start < vma->vm_start || end > vma->vm_end)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
+       process_vma_walk_lock(vma, ops->walk_lock);
        return __walk_page_range(start, end, &walk);
 }
 
@@ -566,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
        if (!walk.mm)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
+       process_vma_walk_lock(vma, ops->walk_lock);
        return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
 
index f5af4b9..d963c74 100644 (file)
@@ -806,14 +806,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
        XA_STATE(xas, &mapping->i_pages, start);
        struct page *page;
        unsigned long swapped = 0;
+       unsigned long max = end - 1;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, end - 1) {
+       xas_for_each(&xas, page, max) {
                if (xas_retry(&xas, page))
                        continue;
                if (xa_is_value(page))
                        swapped++;
-
+               if (xas.xa_index == max)
+                       break;
                if (need_resched()) {
                        xas_pause(&xas);
                        cond_resched_rcu();
index 8e6dde6..b15112b 100644 (file)
@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *swapcache;
        spinlock_t *ptl;
        pte_t *pte, new_pte, old_pte;
-       bool hwposioned = false;
+       bool hwpoisoned = PageHWPoison(page);
        int ret = 1;
 
        swapcache = page;
@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        if (unlikely(!page))
                return -ENOMEM;
        else if (unlikely(PTR_ERR(page) == -EHWPOISON))
-               hwposioned = true;
+               hwpoisoned = true;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 
        old_pte = ptep_get(pte);
 
-       if (unlikely(hwposioned || !PageUptodate(page))) {
+       if (unlikely(hwpoisoned || !PageUptodate(page))) {
                swp_entry_t swp_entry;
 
                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               if (hwposioned) {
+               if (hwpoisoned) {
                        swp_entry = make_hwpoison_entry(swapcache);
                        page = swapcache;
                } else {
index 93cf99a..228a4a5 100644 (file)
@@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
                free_vm_area(area);
                return NULL;
        }
+
+       flush_cache_vmap((unsigned long)area->addr,
+                        (unsigned long)area->addr + count * PAGE_SIZE);
+
        return area->addr;
 }
 EXPORT_SYMBOL_GPL(vmap_pfn);
index 1080209..2fe4a11 100644 (file)
@@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
        static const struct mm_walk_ops mm_walk_ops = {
                .test_walk = should_skip_vma,
                .p4d_entry = walk_pud_range,
+               .walk_lock = PGWALK_RDLOCK,
        };
 
        int err;
@@ -4853,16 +4854,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 
                spin_lock_irq(&pgdat->memcg_lru.lock);
 
-               VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+               if (hlist_nulls_unhashed(&lruvec->lrugen.list))
+                       goto unlock;
 
                gen = lruvec->lrugen.gen;
 
-               hlist_nulls_del_rcu(&lruvec->lrugen.list);
+               hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
                pgdat->memcg_lru.nr_memcgs[gen]--;
 
                if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
                        WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
+unlock:
                spin_unlock_irq(&pgdat->memcg_lru.lock);
        }
 }
@@ -5434,8 +5436,10 @@ restart:
        rcu_read_lock();
 
        hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
-               if (op)
+               if (op) {
                        lru_gen_rotate_memcg(lruvec, op);
+                       op = 0;
+               }
 
                mem_cgroup_put(memcg);
 
@@ -5443,7 +5447,7 @@ restart:
                memcg = lruvec_memcg(lruvec);
 
                if (!mem_cgroup_tryget(memcg)) {
-                       op = 0;
+                       lru_gen_release_memcg(memcg);
                        memcg = NULL;
                        continue;
                }
index 3f05797..32916d2 100644 (file)
@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
 
 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 {
+       struct zs_pool *pool;
        struct zspage *zspage;
 
        /*
@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
        VM_BUG_ON_PAGE(PageIsolated(page), page);
 
        zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
        inc_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
 
        return true;
 }
@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
        kunmap_atomic(s_addr);
 
        replace_sub_page(class, zspage, newpage, page);
+       dec_zspage_isolation(zspage);
        /*
         * Since we complete the data copy and set up new zspage structure,
         * it's okay to release the pool's lock.
         */
        spin_unlock(&pool->lock);
-       dec_zspage_isolation(zspage);
        migrate_write_unlock(zspage);
 
        get_page(newpage);
@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 
 static void zs_page_putback(struct page *page)
 {
+       struct zs_pool *pool;
        struct zspage *zspage;
 
        VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
        zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
        dec_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
 }
 
 static const struct movable_operations zsmalloc_mops = {
index acff565..1d70457 100644 (file)
@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
        struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
        struct batadv_elp_packet *elp_packet;
        struct batadv_hard_iface *primary_if;
-       struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+       struct ethhdr *ethhdr;
        bool res;
        int ret = NET_RX_DROP;
 
@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
        if (!res)
                goto free_skb;
 
+       ethhdr = eth_hdr(skb);
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
                goto free_skb;
 
index e710e9a..e503ee0 100644 (file)
@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 
-       if (hard_iface->if_status != BATADV_IF_ACTIVE)
+       if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+               kfree_skb(skb);
                return;
+       }
 
        batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
        batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -985,7 +987,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
        struct batadv_ogm2_packet *ogm_packet;
-       struct ethhdr *ethhdr = eth_hdr(skb);
+       struct ethhdr *ethhdr;
        int ogm_offset;
        u8 *packet_pos;
        int ret = NET_RX_DROP;
@@ -999,6 +1001,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
        if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
                goto free_skb;
 
+       ethhdr = eth_hdr(skb);
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
                goto free_skb;
 
index 41c1ad3..24c9c0c 100644 (file)
@@ -630,7 +630,19 @@ out:
  */
 void batadv_update_min_mtu(struct net_device *soft_iface)
 {
-       soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+       struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+       int limit_mtu;
+       int mtu;
+
+       mtu = batadv_hardif_min_mtu(soft_iface);
+
+       if (bat_priv->mtu_set_by_user)
+               limit_mtu = bat_priv->mtu_set_by_user;
+       else
+               limit_mtu = ETH_DATA_LEN;
+
+       mtu = min(mtu, limit_mtu);
+       dev_set_mtu(soft_iface, mtu);
 
        /* Check if the local translate table should be cleaned up to match a
         * new (and smaller) MTU.
index ad5714f..6efbc92 100644 (file)
@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
                attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
 
                atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+
+               rtnl_lock();
                batadv_update_min_mtu(bat_priv->soft_iface);
+               rtnl_unlock();
        }
 
        if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
index d3fdf82..85d00dc 100644 (file)
@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 
 static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
 {
+       struct batadv_priv *bat_priv = netdev_priv(dev);
+
        /* check ranges */
        if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
                return -EINVAL;
 
        dev->mtu = new_mtu;
+       bat_priv->mtu_set_by_user = new_mtu;
 
        return 0;
 }
index 36ca312..b95c367 100644 (file)
@@ -774,7 +774,6 @@ check_roaming:
                if (roamed_back) {
                        batadv_tt_global_free(bat_priv, tt_global,
                                              "Roaming canceled");
-                       tt_global = NULL;
                } else {
                        /* The global entry has to be marked as ROAMING and
                         * has to be kept for consistency purpose
index ca9449e..cf1a0ea 100644 (file)
@@ -1547,6 +1547,12 @@ struct batadv_priv {
        struct net_device *soft_iface;
 
        /**
+        * @mtu_set_by_user: MTU was set once by user
+        * protected by rtnl_lock
+        */
+       int mtu_set_by_user;
+
+       /**
         * @bat_counters: mesh internal traffic statistic counters (see
         *  batadv_counters)
         */
index 99770ed..f02b5d3 100644 (file)
@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so)
        return (isotp_bc_flags(so) == 0);
 }
 
-static bool isotp_register_txecho(struct isotp_sock *so)
-{
-       /* all modes but SF_BROADCAST register for tx echo skbs */
-       return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
-}
-
 static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
 {
        struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock)
        lock_sock(sk);
 
        /* remove current filters & unregister */
-       if (so->bound && isotp_register_txecho(so)) {
+       if (so->bound) {
                if (so->ifindex) {
                        struct net_device *dev;
 
@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
                                isotp_rcv, sk, "isotp", sk);
 
-       if (isotp_register_txecho(so)) {
-               /* no consecutive frame echo skb in flight */
-               so->cfecho = 0;
+       /* no consecutive frame echo skb in flight */
+       so->cfecho = 0;
 
-               /* register for echo skb's */
-               can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
-                               isotp_rcv_echo, sk, "isotpe", sk);
-       }
+       /* register for echo skb's */
+       can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+                       isotp_rcv_echo, sk, "isotpe", sk);
 
        dev_put(dev);
 
@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
        case NETDEV_UNREGISTER:
                lock_sock(sk);
                /* remove current filters & unregister */
-               if (so->bound && isotp_register_txecho(so)) {
+               if (so->bound) {
                        if (isotp_register_rxid(so))
                                can_rx_unregister(dev_net(dev), dev, so->rxid,
                                                  SINGLE_MASK(so->rxid),
index e10f593..d50c3f3 100644 (file)
@@ -85,6 +85,7 @@ struct raw_sock {
        int bound;
        int ifindex;
        struct net_device *dev;
+       netdevice_tracker dev_tracker;
        struct list_head notifier;
        int loopback;
        int recv_own_msgs;
@@ -285,8 +286,10 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
        case NETDEV_UNREGISTER:
                lock_sock(sk);
                /* remove current filters & unregister */
-               if (ro->bound)
+               if (ro->bound) {
                        raw_disable_allfilters(dev_net(dev), dev, sk);
+                       netdev_put(dev, &ro->dev_tracker);
+               }
 
                if (ro->count > 1)
                        kfree(ro->filter);
@@ -391,10 +394,12 @@ static int raw_release(struct socket *sock)
 
        /* remove current filters & unregister */
        if (ro->bound) {
-               if (ro->dev)
+               if (ro->dev) {
                        raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
-               else
+                       netdev_put(ro->dev, &ro->dev_tracker);
+               } else {
                        raw_disable_allfilters(sock_net(sk), NULL, sk);
+               }
        }
 
        if (ro->count > 1)
@@ -445,10 +450,10 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                        goto out;
                }
                if (dev->type != ARPHRD_CAN) {
-                       dev_put(dev);
                        err = -ENODEV;
-                       goto out;
+                       goto out_put_dev;
                }
+
                if (!(dev->flags & IFF_UP))
                        notify_enetdown = 1;
 
@@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
                /* filters set by default/setsockopt */
                err = raw_enable_allfilters(sock_net(sk), dev, sk);
-               dev_put(dev);
+               if (err)
+                       goto out_put_dev;
+
        } else {
                ifindex = 0;
 
@@ -467,18 +474,28 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
        if (!err) {
                if (ro->bound) {
                        /* unregister old filters */
-                       if (ro->dev)
+                       if (ro->dev) {
                                raw_disable_allfilters(dev_net(ro->dev),
                                                       ro->dev, sk);
-                       else
+                               /* drop reference to old ro->dev */
+                               netdev_put(ro->dev, &ro->dev_tracker);
+                       } else {
                                raw_disable_allfilters(sock_net(sk), NULL, sk);
+                       }
                }
                ro->ifindex = ifindex;
                ro->bound = 1;
+               /* bind() ok -> hold a reference for new ro->dev */
                ro->dev = dev;
+               if (ro->dev)
+                       netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL);
        }
 
- out:
+out_put_dev:
+       /* remove potential reference from dev_get_by_index() */
+       if (dev)
+               dev_put(dev);
+out:
        release_sock(sk);
        rtnl_unlock();
 
index 06ba0e5..28a5959 100644 (file)
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
        if (unlikely(data_end > data_hard_end))
                return -EINVAL;
 
-       /* ALL drivers MUST init xdp->frame_sz, chicken check below */
-       if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
-               WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
-               return -EINVAL;
-       }
-
        if (unlikely(data_end < xdp->data + ETH_HLEN))
                return -EINVAL;
 
index aef25aa..00c94d9 100644 (file)
@@ -2268,13 +2268,27 @@ out_err:
        return err;
 }
 
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
-                       struct netlink_ext_ack *exterr)
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+                            struct netlink_ext_ack *exterr)
 {
-       return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+       const struct ifinfomsg *ifmp;
+       const struct nlattr *attrs;
+       size_t len;
+
+       ifmp = nla_data(nla_peer);
+       attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
+       len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
+
+       if (ifmp->ifi_index < 0) {
+               NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
+                                   "ifindex can't be negative");
+               return -EINVAL;
+       }
+
+       return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
                                    exterr);
 }
-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
 
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
@@ -3547,6 +3561,9 @@ replay:
        if (ifm->ifi_index > 0) {
                link_specified = true;
                dev = __dev_get_by_index(net, ifm->ifi_index);
+       } else if (ifm->ifi_index < 0) {
+               NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+               return -EINVAL;
        } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
                link_specified = true;
                dev = rtnl_dev_get(net, tb);
index a29508e..ef1a2eb 100644 (file)
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 {
+       int ret;
+
        static const struct strp_callbacks cb = {
                .rcv_msg        = sk_psock_strp_read,
                .read_sock_done = sk_psock_strp_read_done,
                .parse_msg      = sk_psock_strp_parse,
        };
 
-       return strp_init(&psock->strp, sk, &cb);
+       ret = strp_init(&psock->strp, sk, &cb);
+       if (!ret)
+               sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+       return ret;
 }
 
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 static void sk_psock_done_strp(struct sk_psock *psock)
 {
        /* Parser has been stopped */
-       if (psock->progs.stream_parser)
+       if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
                strp_done(&psock->strp);
 }
 #else
index 6d4f28e..c9cffb7 100644 (file)
@@ -1778,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                spin_unlock(&sk->sk_peer_lock);
 
                if (!peer_pid)
-                       return -ESRCH;
+                       return -ENODATA;
 
                pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
                put_pid(peer_pid);
@@ -3159,7 +3159,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
 
-       if (sk_under_memory_pressure(sk) &&
+       if (sk_under_global_memory_pressure(sk) &&
            (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
                sk_leave_memory_pressure(sk);
 }
index 08ab108..8f07fea 100644 (file)
@@ -146,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
        list_for_each_entry_safe(link, tmp, &psock->link, list) {
                if (link->link_raw == link_raw) {
                        struct bpf_map *map = link->map;
-                       struct bpf_stab *stab = container_of(map, struct bpf_stab,
-                                                            map);
-                       if (psock->saved_data_ready && stab->progs.stream_parser)
+                       struct sk_psock_progs *progs = sock_map_progs(map);
+
+                       if (psock->saved_data_ready && progs->stream_parser)
                                strp_stop = true;
-                       if (psock->saved_data_ready && stab->progs.stream_verdict)
+                       if (psock->saved_data_ready && progs->stream_verdict)
                                verdict_stop = true;
-                       if (psock->saved_data_ready && stab->progs.skb_verdict)
+                       if (psock->saved_data_ready && progs->skb_verdict)
                                verdict_stop = true;
                        list_del(&link->list);
                        sk_psock_free_link(link);
index fa80793..a545ad7 100644 (file)
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                                    inet->inet_daddr,
                                                    inet->inet_sport,
                                                    inet->inet_dport);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        err = dccp_connect(sk);
        rt = NULL;
@@ -432,7 +432,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
        RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
        newinet->mc_index  = inet_iif(skb);
        newinet->mc_ttl    = ip_hdr(skb)->ttl;
-       newinet->inet_id   = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
                goto put_and_exit;
index b8a2473..fd2eb14 100644 (file)
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 
        /* And store cached results */
        icsk->icsk_pmtu_cookie = pmtu;
-       dp->dccps_mss_cache = cur_mps;
+       WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
 
        return cur_mps;
 }
index f331e59..fcc5c9d 100644 (file)
@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
 __poll_t dccp_poll(struct file *file, struct socket *sock,
                       poll_table *wait)
 {
-       __poll_t mask;
        struct sock *sk = sock->sk;
+       __poll_t mask;
+       u8 shutdown;
+       int state;
 
        sock_poll_wait(file, sock, wait);
-       if (sk->sk_state == DCCP_LISTEN)
+
+       state = inet_sk_state_load(sk);
+       if (state == DCCP_LISTEN)
                return inet_csk_listen_poll(sk);
 
        /* Socket is not locked. We are protected from async events
@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
         */
 
        mask = 0;
-       if (sk->sk_err)
+       if (READ_ONCE(sk->sk_err))
                mask = EPOLLERR;
+       shutdown = READ_ONCE(sk->sk_shutdown);
 
-       if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+       if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
                mask |= EPOLLHUP;
-       if (sk->sk_shutdown & RCV_SHUTDOWN)
+       if (shutdown & RCV_SHUTDOWN)
                mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 
        /* Connected? */
-       if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+       if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
                if (atomic_read(&sk->sk_rmem_alloc) > 0)
                        mask |= EPOLLIN | EPOLLRDNORM;
 
-               if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+               if (!(shutdown & SEND_SHUTDOWN)) {
                        if (sk_stream_is_writeable(sk)) {
                                mask |= EPOLLOUT | EPOLLWRNORM;
                        } else {  /* send SIGIO later */
@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
        }
        return mask;
 }
-
 EXPORT_SYMBOL_GPL(dccp_poll);
 
 int dccp_ioctl(struct sock *sk, int cmd, int *karg)
@@ -630,7 +634,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
                return dccp_getsockopt_service(sk, len,
                                               (__be32 __user *)optval, optlen);
        case DCCP_SOCKOPT_GET_CUR_MPS:
-               val = dp->dccps_mss_cache;
+               val = READ_ONCE(dp->dccps_mss_cache);
                break;
        case DCCP_SOCKOPT_AVAILABLE_CCIDS:
                return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +743,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        trace_dccp_probe(sk, len);
 
-       if (len > dp->dccps_mss_cache)
+       if (len > READ_ONCE(dp->dccps_mss_cache))
                return -EMSGSIZE;
 
        lock_sock(sk);
@@ -772,6 +776,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                goto out_discard;
        }
 
+       /* We need to check dccps_mss_cache after socket is locked. */
+       if (len > dp->dccps_mss_cache) {
+               rc = -EMSGSIZE;
+               goto out_discard;
+       }
+
        skb_reserve(skb, sk->sk_prot->max_header);
        rc = memcpy_from_msg(skb_put(skb, len), msg, len);
        if (rc != 0)
index 1f00f87..bfed792 100644 (file)
@@ -6704,6 +6704,7 @@ void devlink_notify_unregister(struct devlink *devlink)
        struct devlink_param_item *param_item;
        struct devlink_trap_item *trap_item;
        struct devlink_port *devlink_port;
+       struct devlink_linecard *linecard;
        struct devlink_rate *rate_node;
        struct devlink_region *region;
        unsigned long port_index;
@@ -6732,6 +6733,8 @@ void devlink_notify_unregister(struct devlink *devlink)
 
        xa_for_each(&devlink->ports, port_index, devlink_port)
                devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+       list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+               devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
        devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
index 9b2ca2f..02736b8 100644 (file)
@@ -340,7 +340,7 @@ lookup_protocol:
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
 
-       inet->inet_id = 0;
+       atomic_set(&inet->inet_id, 0);
 
        sock_init_data(sock, sk);
 
index 4d1af0c..cb5dbee 100644 (file)
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        reuseport_has_conns_set(sk);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        sk_dst_set(sk, &rt->dst);
        err = 0;
index 92c02c8..586b1b3 100644 (file)
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
                .un.frag.__unused       = 0,
                .un.frag.mtu            = htons(mtu),
        };
-       icmph->checksum = ip_compute_csum(icmph, len);
+       icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
        skb_reset_transport_header(skb);
 
        niph = skb_push(skb, sizeof(*niph));
index 53bfd8a..d1e7d0c 100644 (file)
@@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
        switch (skb->protocol) {
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                break;
        case htons(ETH_P_IPV6):
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                break;
        default:
                goto tx_err;
index f95142e..be5498f 100644 (file)
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
                                     &rtm_dump_nexthop_cb, &filter);
        if (err < 0) {
                if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
        }
 
-out:
-       err = skb->len;
-out_err:
        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
                    dd->filter.res_bucket_nh_id != nhge->nh->id)
                        continue;
 
+               dd->ctx->bucket_index = bucket_index;
                err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
                                         RTM_NEWNEXTHOPBUCKET, portid,
                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                         cb->extack);
-               if (err < 0) {
-                       if (likely(skb->len))
-                               goto out;
-                       goto out_err;
-               }
+               if (err)
+                       return err;
        }
 
        dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
-       bucket_index = 0;
+       dd->ctx->bucket_index = 0;
 
-out:
-       err = skb->len;
-out_err:
-       dd->ctx->bucket_index = bucket_index;
-       return err;
+       return 0;
 }
 
 static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
 
        if (err < 0) {
                if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
        }
 
-out:
-       err = skb->len;
-out_err:
        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
index a59cc4b..2dbdc26 100644 (file)
@@ -312,7 +312,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                             inet->inet_daddr));
        }
 
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        if (tcp_fastopen_defer_connect(sk, &err))
                return err;
@@ -1596,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
        inet_csk(newsk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        /* Set ToS of the new socket based upon the value of incoming SYN.
         * ECT bits are set later in tcp_init_transfer().
index 470f581..206418b 100644 (file)
@@ -591,7 +591,9 @@ out_reset_timer:
            tcp_stream_is_thin(tp) &&
            icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
                icsk->icsk_backoff = 0;
-               icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+               icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+                                      tcp_rto_min(sk),
+                                      TCP_RTO_MAX);
        } else if (sk->sk_state != TCP_SYN_SENT ||
                   icsk->icsk_backoff >
                   READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
index 658bfed..08d4b71 100644 (file)
@@ -152,7 +152,7 @@ config INET6_TUNNEL
        default n
 
 config IPV6_VTI
-tristate "Virtual (secure) IPv6: tunneling"
+       tristate "Virtual (secure) IPv6: tunneling"
        select IPV6_TUNNEL
        select NET_IP_TUNNEL
        select XFRM
index 10b2228..73c85d4 100644 (file)
@@ -568,12 +568,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                    vti6_addr_conflict(t, ipv6_hdr(skb)))
                        goto tx_err;
 
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                break;
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                break;
        default:
                goto tx_err;
index 18634eb..a42be96 100644 (file)
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 static inline int ndisc_is_useropt(const struct net_device *dev,
                                   struct nd_opt_hdr *opt)
 {
-       return opt->nd_opt_type == ND_OPT_RDNSS ||
+       return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+               opt->nd_opt_type == ND_OPT_RDNSS ||
                opt->nd_opt_type == ND_OPT_DNSSL ||
                opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
                opt->nd_opt_type == ND_OPT_PREF64 ||
index ede3c6a..b4ea4cf 100644 (file)
@@ -1848,9 +1848,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
        if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
                struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
 
-               if ((xfilter->sadb_x_filter_splen >=
+               if ((xfilter->sadb_x_filter_splen >
                        (sizeof(xfrm_address_t) << 3)) ||
-                   (xfilter->sadb_x_filter_dplen >=
+                   (xfilter->sadb_x_filter_dplen >
                        (sizeof(xfrm_address_t) << 3))) {
                        mutex_unlock(&pfk->dump_lock);
                        return -EINVAL;
index 4f707d2..0af2599 100644 (file)
@@ -1083,7 +1083,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
        struct sk_buff *tail = skb_peek_tail(frames);
        struct ieee80211_rx_status *status;
 
-       if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
+       if (tid_agg_rx->reorder_buf_filtered &&
+           tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
                return true;
 
        if (!tail)
@@ -1124,7 +1125,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
        }
 
 no_frame:
-       tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
+       if (tid_agg_rx->reorder_buf_filtered)
+               tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
        tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
 }
 
@@ -4264,6 +4266,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
                                          u16 ssn, u64 filtered,
                                          u16 received_mpdus)
 {
+       struct ieee80211_local *local;
        struct sta_info *sta;
        struct tid_ampdu_rx *tid_agg_rx;
        struct sk_buff_head frames;
@@ -4281,6 +4284,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
 
        sta = container_of(pubsta, struct sta_info, sta);
 
+       local = sta->sdata->local;
+       WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64,
+                 "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n",
+                 local->hw.max_rx_aggregation_subframes);
+
        if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
                return;
 
index 3317d1c..d806585 100644 (file)
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 
        lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
 
-       if (flags & MPTCP_CF_FASTCLOSE) {
+       if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
                /* be sure to force the tcp_disconnect() path,
                 * to generate the egress reset
                 */
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
 
        if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
                __mptcp_clean_una_wakeup(sk);
-       if (unlikely(&msk->cb_flags)) {
+       if (unlikely(msk->cb_flags)) {
                /* be sure to set the current sk state before tacking actions
                 * depending on sk_state, that is processing MPTCP_ERROR_REPORT
                 */
index 37fbe22..ba2a873 100644 (file)
@@ -325,7 +325,6 @@ struct mptcp_sock {
        u32             subflow_id;
        u32             setsockopt_seq;
        char            ca_name[TCP_CA_NAME_MAX];
-       struct mptcp_sock       *dl_next;
 };
 
 #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
index 9ee3b7a..94ae7dd 100644 (file)
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
 void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
 {
        struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
-       struct mptcp_sock *msk, *next, *head = NULL;
-       struct request_sock *req;
-       struct sock *sk;
+       struct request_sock *req, *head, *tail;
+       struct mptcp_subflow_context *subflow;
+       struct sock *sk, *ssk;
 
-       /* build a list of all unaccepted mptcp sockets */
+       /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+        * Splice the req list, so that accept() can not reach the pending ssk after
+        * the listener socket is released below.
+        */
        spin_lock_bh(&queue->rskq_lock);
-       for (req = queue->rskq_accept_head; req; req = req->dl_next) {
-               struct mptcp_subflow_context *subflow;
-               struct sock *ssk = req->sk;
+       head = queue->rskq_accept_head;
+       tail = queue->rskq_accept_tail;
+       queue->rskq_accept_head = NULL;
+       queue->rskq_accept_tail = NULL;
+       spin_unlock_bh(&queue->rskq_lock);
+
+       if (!head)
+               return;
 
+       /* can't acquire the msk socket lock under the subflow one,
+        * or will cause ABBA deadlock
+        */
+       release_sock(listener_ssk);
+
+       for (req = head; req; req = req->dl_next) {
+               ssk = req->sk;
                if (!sk_is_mptcp(ssk))
                        continue;
 
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
                if (!subflow || !subflow->conn)
                        continue;
 
-               /* skip if already in list */
                sk = subflow->conn;
-               msk = mptcp_sk(sk);
-               if (msk->dl_next || msk == head)
-                       continue;
-
                sock_hold(sk);
-               msk->dl_next = head;
-               head = msk;
-       }
-       spin_unlock_bh(&queue->rskq_lock);
-       if (!head)
-               return;
-
-       /* can't acquire the msk socket lock under the subflow one,
-        * or will cause ABBA deadlock
-        */
-       release_sock(listener_ssk);
-
-       for (msk = head; msk; msk = next) {
-               sk = (struct sock *)msk;
 
                lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-               next = msk->dl_next;
-               msk->dl_next = NULL;
-
                __mptcp_unaccepted_force_close(sk);
                release_sock(sk);
 
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
 
        /* we are still under the listener msk socket lock */
        lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+       /* restore the listener queue, to let the TCP code clean it up */
+       spin_lock_bh(&queue->rskq_lock);
+       WARN_ON_ONCE(queue->rskq_accept_head);
+       queue->rskq_accept_head = head;
+       queue->rskq_accept_tail = tail;
+       spin_unlock_bh(&queue->rskq_lock);
 }
 
 static int subflow_ulp_init(struct sock *sk)
index 62606fb..4bb0d90 100644 (file)
@@ -1876,6 +1876,7 @@ static int
 proc_do_sync_threshold(struct ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
+       struct netns_ipvs *ipvs = table->extra2;
        int *valp = table->data;
        int val[2];
        int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
                .mode = table->mode,
        };
 
+       mutex_lock(&ipvs->sync_mutex);
        memcpy(val, valp, sizeof(val));
        rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
        if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
                else
                        memcpy(valp, val, sizeof(val));
        }
+       mutex_unlock(&ipvs->sync_mutex);
        return rc;
 }
 
@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
        tbl[idx].data = &ipvs->sysctl_sync_threshold;
+       tbl[idx].extra2 = ipvs;
        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
        ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
        tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
index 91eacc9..b6bcc8f 100644 (file)
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
        [SCTP_CONNTRACK_COOKIE_WAIT]            = 3 SECS,
        [SCTP_CONNTRACK_COOKIE_ECHOED]          = 3 SECS,
        [SCTP_CONNTRACK_ESTABLISHED]            = 210 SECS,
-       [SCTP_CONNTRACK_SHUTDOWN_SENT]          = 300 SECS / 1000,
-       [SCTP_CONNTRACK_SHUTDOWN_RECD]          = 300 SECS / 1000,
+       [SCTP_CONNTRACK_SHUTDOWN_SENT]          = 3 SECS,
+       [SCTP_CONNTRACK_SHUTDOWN_RECD]          = 3 SECS,
        [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]      = 3 SECS,
        [SCTP_CONNTRACK_HEARTBEAT_SENT]         = 30 SECS,
 };
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
        {
 /*     ORIGINAL        */
 /*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
-/* init         */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init         */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
 /* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
 /* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
 /* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
index d3c6ecd..eb8b116 100644 (file)
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
 static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
 static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
 
 enum {
        NFT_VALIDATE_SKIP       = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
 static void nf_tables_trans_destroy_work(struct work_struct *w);
 static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
 
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
 static void nft_ctx_init(struct nft_ctx *ctx,
                         struct net *net,
                         const struct sk_buff *skb,
@@ -582,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
        return __nft_trans_set_add(ctx, msg_type, set, NULL);
 }
 
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem);
-
 static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
                                  struct nft_set *set,
                                  const struct nft_set_iter *iter,
@@ -1372,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
        if (table == NULL)
                goto err_kzalloc;
 
-       table->validate_state = NFT_VALIDATE_SKIP;
+       table->validate_state = nft_net->validate_state;
        table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
        if (table->name == NULL)
                goto err_strdup;
@@ -5055,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
        INIT_LIST_HEAD(&set->bindings);
        INIT_LIST_HEAD(&set->catchall_list);
+       refcount_set(&set->refs, 1);
        set->table = table;
        write_pnet(&set->net, net);
        set->ops = ops;
@@ -5122,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
        }
 }
 
+static void nft_set_put(struct nft_set *set)
+{
+       if (refcount_dec_and_test(&set->refs)) {
+               kfree(set->name);
+               kvfree(set);
+       }
+}
+
 static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 {
        int i;
@@ -5134,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 
        set->ops->destroy(ctx, set);
        nft_set_catchall_destroy(ctx, set);
-       kfree(set->name);
-       kvfree(set);
+       nft_set_put(set);
 }
 
 static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5602,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
                                  const struct nft_set_iter *iter,
                                  struct nft_set_elem *elem)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
        struct nft_set_dump_args *args;
 
+       if (nft_set_elem_expired(ext))
+               return 0;
+
        args = container_of(iter, struct nft_set_dump_args, iter);
        return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
 }
@@ -6274,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
                if (nft_set_elem_active(ext, genmask) &&
-                   !nft_set_elem_expired(ext))
+                   !nft_set_elem_expired(ext) &&
+                   !nft_set_elem_is_dead(ext))
                        return ext;
        }
 
@@ -6282,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
 }
 EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
 
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
-       struct nft_set_elem_catchall *catchall, *next;
-       struct nft_set_ext *ext;
-       void *elem = NULL;
-
-       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
-               ext = nft_set_elem_ext(set, catchall->elem);
-
-               if (!nft_set_elem_expired(ext) ||
-                   nft_set_elem_mark_busy(ext))
-                       continue;
-
-               elem = catchall->elem;
-               list_del_rcu(&catchall->list);
-               kfree_rcu(catchall, rcu);
-               break;
-       }
-
-       return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
 static int nft_setelem_catchall_insert(const struct net *net,
                                       struct nft_set *set,
                                       const struct nft_set_elem *elem,
@@ -6366,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
 
        if (nft_setelem_is_catchall(set, elem)) {
                nft_set_elem_change_active(net, set, ext);
-               nft_set_elem_clear_busy(ext);
        } else {
                set->ops->activate(net, set, elem);
        }
@@ -6381,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
 
        list_for_each_entry(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_is_active(net, ext) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_is_active(net, ext))
                        continue;
 
                kfree(elem->priv);
@@ -6777,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                goto err_elem_free;
        }
 
-       ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+       ext->genmask = nft_genmask_cur(ctx->net);
 
        err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
        if (err) {
@@ -6929,9 +6918,9 @@ static void nft_setelem_data_activate(const struct net *net,
                nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
 }
 
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem)
 {
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 
@@ -7095,14 +7084,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
 
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_set_elem_active(ext, genmask) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_set_elem_active(ext, genmask))
                        continue;
 
                elem.priv = catchall->elem;
                ret = __nft_set_catchall_flush(ctx, set, &elem);
                if (ret < 0)
                        break;
+               nft_set_elem_change_active(ctx->net, set, ext);
        }
 
        return ret;
@@ -7170,29 +7159,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
        return err;
 }
 
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
-       struct nft_set_gc_batch *gcb;
-       unsigned int i;
-
-       gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
-       for (i = 0; i < gcb->head.cnt; i++)
-               nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
-       kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp)
-{
-       struct nft_set_gc_batch *gcb;
-
-       gcb = kzalloc(sizeof(*gcb), gfp);
-       if (gcb == NULL)
-               return gcb;
-       gcb->head.set = set;
-       return gcb;
-}
-
 /*
  * Stateful objects
  */
@@ -9085,9 +9051,8 @@ static int nf_tables_validate(struct net *net)
                                return -EAGAIN;
 
                        nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+                       break;
                }
-
-               break;
        }
 
        return 0;
@@ -9414,6 +9379,212 @@ void nft_chain_del(struct nft_chain *chain)
        list_del_rcu(&chain->list);
 }
 
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+                                       struct nft_trans_gc *trans)
+{
+       void **priv = trans->priv;
+       unsigned int i;
+
+       for (i = 0; i < trans->count; i++) {
+               struct nft_set_elem elem = {
+                       .priv = priv[i],
+               };
+
+               nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+               nft_setelem_remove(ctx->net, trans->set, &elem);
+       }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+       nft_set_put(trans->set);
+       put_net(trans->net);
+       kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+       struct nft_set_elem elem = {};
+       struct nft_trans_gc *trans;
+       struct nft_ctx ctx = {};
+       unsigned int i;
+
+       trans = container_of(rcu, struct nft_trans_gc, rcu);
+       ctx.net = read_pnet(&trans->set->net);
+
+       for (i = 0; i < trans->count; i++) {
+               elem.priv = trans->priv[i];
+               if (!nft_setelem_is_catchall(trans->set, &elem))
+                       atomic_dec(&trans->set->nelems);
+
+               nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+       }
+
+       nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+       struct nftables_pernet *nft_net;
+       struct nft_ctx ctx = {};
+
+       nft_net = nft_pernet(trans->net);
+
+       mutex_lock(&nft_net->commit_mutex);
+
+       /* Check for race with transaction, otherwise this batch refers to
+        * stale objects that might not be there anymore. Skip transaction if
+        * set has been destroyed from control plane transaction in case gc
+        * worker loses race.
+        */
+       if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+               mutex_unlock(&nft_net->commit_mutex);
+               return false;
+       }
+
+       ctx.net = trans->net;
+       ctx.table = trans->set->table;
+
+       nft_trans_gc_setelem_remove(&ctx, trans);
+       mutex_unlock(&nft_net->commit_mutex);
+
+       return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+       struct nft_trans_gc *trans, *next;
+       LIST_HEAD(trans_gc_list);
+
+       spin_lock(&nf_tables_gc_list_lock);
+       list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+       spin_unlock(&nf_tables_gc_list_lock);
+
+       list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+               list_del(&trans->list);
+               if (!nft_trans_gc_work_done(trans)) {
+                       nft_trans_gc_destroy(trans);
+                       continue;
+               }
+               call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+       }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp)
+{
+       struct net *net = read_pnet(&set->net);
+       struct nft_trans_gc *trans;
+
+       trans = kzalloc(sizeof(*trans), gfp);
+       if (!trans)
+               return NULL;
+
+       trans->net = maybe_get_net(net);
+       if (!trans->net) {
+               kfree(trans);
+               return NULL;
+       }
+
+       refcount_inc(&set->refs);
+       trans->set = set;
+       trans->seq = gc_seq;
+
+       return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+       trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+       spin_lock(&nf_tables_gc_list_lock);
+       list_add_tail(&trans->list, &nf_tables_gc_list);
+       spin_unlock(&nf_tables_gc_list_lock);
+
+       schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+       return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp)
+{
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       nft_trans_gc_queue_work(gc);
+
+       return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+       if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+               return NULL;
+
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+       return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+       WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq)
+{
+       struct nft_set_elem_catchall *catchall;
+       const struct nft_set *set = gc->set;
+       struct nft_set_ext *ext;
+
+       list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+               ext = nft_set_elem_ext(set, catchall->elem);
+
+               if (!nft_set_elem_expired(ext))
+                       continue;
+               if (nft_set_elem_is_dead(ext))
+                       goto dead_elem;
+
+               nft_set_elem_dead(ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       return NULL;
+
+               nft_trans_gc_elem_add(gc, catchall->elem);
+       }
+
+       return gc;
+}
+
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9572,15 +9743,31 @@ static void nft_set_commit_update(struct list_head *set_update_list)
        }
 }
 
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+       unsigned int gc_seq;
+
+       /* Bump gc counter, it becomes odd, this is the busy mark. */
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+       return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
        struct nft_trans *trans, *next;
+       unsigned int base_seq, gc_seq;
        LIST_HEAD(set_update_list);
        struct nft_trans_elem *te;
        struct nft_chain *chain;
        struct nft_table *table;
-       unsigned int base_seq;
        LIST_HEAD(adl);
        int err;
 
@@ -9611,8 +9798,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
        }
 
        /* 0. Validate ruleset, otherwise roll back for error reporting. */
-       if (nf_tables_validate(net) < 0)
+       if (nf_tables_validate(net) < 0) {
+               nft_net->validate_state = NFT_VALIDATE_DO;
                return -EAGAIN;
+       }
 
        err = nft_flow_rule_offload_commit(net);
        if (err < 0)
@@ -9657,6 +9846,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
        WRITE_ONCE(nft_net->base_seq, base_seq);
 
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        /* step 3. Start new generation, rules_gen_X now in use. */
        net->nft.gencursor = nft_gencursor_next(net);
 
@@ -9764,6 +9955,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                        break;
                case NFT_MSG_DELSET:
                case NFT_MSG_DESTROYSET:
+                       nft_trans_set(trans)->dead = 1;
                        list_del_rcu(&nft_trans_set(trans)->list);
                        nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
                                             trans->msg_type, GFP_KERNEL);
@@ -9866,6 +10058,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
        nft_commit_notify(net, NETLINK_CB(skb).portid);
        nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
        nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+       nft_gc_seq_end(nft_net, gc_seq);
+       nft_net->validate_state = NFT_VALIDATE_SKIP;
        nf_tables_commit_release(net);
 
        return 0;
@@ -10142,8 +10337,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
                           enum nfnl_abort_action action)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
-       int ret = __nf_tables_abort(net, action);
+       unsigned int gc_seq;
+       int ret;
 
+       gc_seq = nft_gc_seq_begin(nft_net);
+       ret = __nf_tables_abort(net, action);
+       nft_gc_seq_end(nft_net, gc_seq);
        mutex_unlock(&nft_net->commit_mutex);
 
        return ret;
@@ -10866,6 +11065,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
        struct net *net = n->net;
        unsigned int deleted;
        bool restart = false;
+       unsigned int gc_seq;
 
        if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
                return NOTIFY_DONE;
@@ -10873,8 +11073,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
        nft_net = nft_pernet(net);
        deleted = 0;
        mutex_lock(&nft_net->commit_mutex);
+
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        if (!list_empty(&nf_tables_destroy_list))
-               rcu_barrier();
+               nf_tables_trans_destroy_flush_work();
 again:
        list_for_each_entry(table, &nft_net->tables, list) {
                if (nft_table_has_owner(table) &&
@@ -10895,6 +11098,8 @@ again:
                if (restart)
                        goto again;
        }
+       nft_gc_seq_end(nft_net, gc_seq);
+
        mutex_unlock(&nft_net->commit_mutex);
 
        return NOTIFY_DONE;
@@ -10915,6 +11120,8 @@ static int __net_init nf_tables_init_net(struct net *net)
        INIT_LIST_HEAD(&nft_net->notify_list);
        mutex_init(&nft_net->commit_mutex);
        nft_net->base_seq = 1;
+       nft_net->gc_seq = 0;
+       nft_net->validate_state = NFT_VALIDATE_SKIP;
 
        return 0;
 }
@@ -10931,22 +11138,36 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
+       unsigned int gc_seq;
 
        mutex_lock(&nft_net->commit_mutex);
+
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        if (!list_empty(&nft_net->commit_list) ||
            !list_empty(&nft_net->module_list))
                __nf_tables_abort(net, NFNL_ABORT_NONE);
+
        __nft_release_tables(net);
+
+       nft_gc_seq_end(nft_net, gc_seq);
+
        mutex_unlock(&nft_net->commit_mutex);
        WARN_ON_ONCE(!list_empty(&nft_net->tables));
        WARN_ON_ONCE(!list_empty(&nft_net->module_list));
        WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
 }
 
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+       flush_work(&trans_gc_work);
+}
+
 static struct pernet_operations nf_tables_net_ops = {
        .init           = nf_tables_init_net,
        .pre_exit       = nf_tables_pre_exit_net,
        .exit           = nf_tables_exit_net,
+       .exit_batch     = nf_tables_exit_batch,
        .id             = &nf_tables_net_id,
        .size           = sizeof(struct nftables_pernet),
 };
@@ -11018,6 +11239,7 @@ static void __exit nf_tables_module_exit(void)
        nft_chain_filter_fini();
        nft_chain_route_fini();
        unregister_pernet_subsys(&nf_tables_net_ops);
+       cancel_work_sync(&trans_gc_work);
        cancel_work_sync(&trans_destroy_work);
        rcu_barrier();
        rhltable_destroy(&nft_objname_ht);
index 4fb34d7..5c5cc01 100644 (file)
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        if (IS_ERR(set))
                return PTR_ERR(set);
 
+       if (set->flags & NFT_SET_OBJECT)
+               return -EOPNOTSUPP;
+
        if (set->ops->update == NULL)
                return -EOPNOTSUPP;
 
index 0b73cb0..5247636 100644 (file)
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
 
        if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
                return 1;
+       if (nft_set_elem_is_dead(&he->ext))
+               return 1;
        if (nft_set_elem_expired(&he->ext))
                return 1;
        if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
        struct nft_rhash_elem *he = elem->priv;
 
        nft_set_elem_change_active(net, set, &he->ext);
-       nft_set_elem_clear_busy(&he->ext);
 }
 
 static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
 {
        struct nft_rhash_elem *he = priv;
 
-       if (!nft_set_elem_mark_busy(&he->ext) ||
-           !nft_is_active(net, &he->ext)) {
-               nft_set_elem_change_active(net, set, &he->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &he->ext);
+
+       return true;
 }
 
 static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
 
        rcu_read_lock();
        he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
-       if (he != NULL &&
-           !nft_rhash_flush(net, set, he))
-               he = NULL;
+       if (he)
+               nft_set_elem_change_active(net, set, &he->ext);
 
        rcu_read_unlock();
 
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
        if (he == NULL)
                return false;
 
-       return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+       nft_set_elem_dead(&he->ext);
+
+       return true;
 }
 
 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (nft_set_elem_expired(&he->ext))
-                       goto cont;
                if (!nft_set_elem_active(&he->ext, iter->genmask))
                        goto cont;
 
@@ -314,25 +311,51 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
 
 static void nft_rhash_gc(struct work_struct *work)
 {
+       struct nftables_pernet *nft_net;
        struct nft_set *set;
        struct nft_rhash_elem *he;
        struct nft_rhash *priv;
-       struct nft_set_gc_batch *gcb = NULL;
        struct rhashtable_iter hti;
+       struct nft_trans_gc *gc;
+       struct net *net;
+       u32 gc_seq;
 
        priv = container_of(work, struct nft_rhash, gc_work.work);
        set  = nft_set_container_of(priv);
+       net  = read_pnet(&set->net);
+       nft_net = nft_pernet(net);
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+
+       if (nft_set_gc_is_pending(set))
+               goto done;
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
 
        rhashtable_walk_enter(&priv->ht, &hti);
        rhashtable_walk_start(&hti);
 
        while ((he = rhashtable_walk_next(&hti))) {
                if (IS_ERR(he)) {
-                       if (PTR_ERR(he) != -EAGAIN)
-                               break;
+                       if (PTR_ERR(he) != -EAGAIN) {
+                               nft_trans_gc_destroy(gc);
+                               gc = NULL;
+                               goto try_later;
+                       }
                        continue;
                }
 
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
+               if (nft_set_elem_is_dead(&he->ext))
+                       goto dead_elem;
+
                if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
                    nft_rhash_expr_needs_gc_run(set, &he->ext))
                        goto needs_gc_run;
@@ -340,26 +363,26 @@ static void nft_rhash_gc(struct work_struct *work)
                if (!nft_set_elem_expired(&he->ext))
                        continue;
 needs_gc_run:
-               if (nft_set_elem_mark_busy(&he->ext))
-                       continue;
+               nft_set_elem_dead(&he->ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
 
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb == NULL)
-                       break;
-               rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, he);
+               nft_trans_gc_elem_add(gc, he);
        }
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+       /* catchall list iteration requires rcu read side lock. */
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
 
-       he = nft_set_catchall_gc(set);
-       if (he) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, he);
-       }
-       nft_set_gc_batch_complete(gcb);
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+
+done:
        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                           nft_set_gc_interval(set));
 }
@@ -394,7 +417,7 @@ static int nft_rhash_init(const struct nft_set *set,
                return err;
 
        INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
-       if (set->flags & NFT_SET_TIMEOUT)
+       if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
                nft_rhash_gc_init(set);
 
        return 0;
@@ -422,7 +445,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
        };
 
        cancel_delayed_work_sync(&priv->gc_work);
-       rcu_barrier();
        rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
                                    (void *)&rhash_ctx);
 }
index 49915a2..6af9c9e 100644 (file)
@@ -566,8 +566,9 @@ next_match:
                        goto out;
 
                if (last) {
-                       if (nft_set_elem_expired(&f->mt[b].e->ext) ||
-                           (genmask &&
+                       if (nft_set_elem_expired(&f->mt[b].e->ext))
+                               goto next_match;
+                       if ((genmask &&
                             !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
                                goto next_match;
 
@@ -602,7 +603,7 @@ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
                            const struct nft_set_elem *elem, unsigned int flags)
 {
        return pipapo_get(net, set, (const u8 *)elem->key.val.data,
-                         nft_genmask_cur(net));
+                        nft_genmask_cur(net));
 }
 
 /**
@@ -901,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
 static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
                         int mask_bits)
 {
-       int rule = f->rules++, group, ret, bit_offset = 0;
+       int rule = f->rules, group, ret, bit_offset = 0;
 
-       ret = pipapo_resize(f, f->rules - 1, f->rules);
+       ret = pipapo_resize(f, f->rules, f->rules + 1);
        if (ret)
                return ret;
 
+       f->rules++;
+
        for (group = 0; group < f->groups; group++) {
                int i, v;
                u8 mask;
@@ -1051,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
                        step++;
                        if (step >= len) {
                                if (!masks) {
-                                       pipapo_insert(f, base, 0);
+                                       err = pipapo_insert(f, base, 0);
+                                       if (err < 0)
+                                               return err;
                                        masks = 1;
                                }
                                goto out;
@@ -1234,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
                else
                        ret = pipapo_expand(f, start, end, f->groups * f->bb);
 
+               if (ret < 0)
+                       return ret;
+
                if (f->bsize > bsize_max)
                        bsize_max = f->bsize;
 
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
        }
 }
 
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+                                    struct nft_pipapo_elem *e)
+
+{
+       struct nft_set_elem elem = {
+               .priv   = e,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+}
+
 /**
  * pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @set:       nftables API set representation
+ * @_set:      nftables API set representation
  * @m:         Matching data
  */
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
 {
+       struct nft_set *set = (struct nft_set *) _set;
        struct nft_pipapo *priv = nft_set_priv(set);
+       struct net *net = read_pnet(&set->net);
        int rules_f0, first_rule = 0;
        struct nft_pipapo_elem *e;
+       struct nft_trans_gc *gc;
+
+       gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+       if (!gc)
+               return;
 
        while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
                union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                f--;
                i--;
                e = f->mt[rulemap[i].to].e;
-               if (nft_set_elem_expired(&e->ext) &&
-                   !nft_set_elem_mark_busy(&e->ext)) {
+
+               /* synchronous gc never fails, there is no need to set on
+                * NFT_SET_ELEM_DEAD_BIT.
+                */
+               if (nft_set_elem_expired(&e->ext)) {
                        priv->dirty = true;
-                       pipapo_drop(m, rulemap);
 
-                       rcu_barrier();
-                       nft_set_elem_destroy(set, e, true);
+                       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+                       if (!gc)
+                               break;
+
+                       nft_pipapo_gc_deactivate(net, set, e);
+                       pipapo_drop(m, rulemap);
+                       nft_trans_gc_elem_add(gc, e);
 
                        /* And check again current first rule, which is now the
                         * first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                }
        }
 
-       e = nft_set_catchall_gc(set);
-       if (e)
-               nft_set_elem_destroy(set, e, true);
-
-       priv->last_gc = jiffies;
+       gc = nft_trans_gc_catchall(gc, 0);
+       if (gc) {
+               nft_trans_gc_queue_sync_done(gc);
+               priv->last_gc = jiffies;
+       }
 }
 
 /**
@@ -1664,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
        priv->clone = new_clone;
 }
 
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+       const struct net *net = read_pnet(&set->net);
+
+       return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+       return true;
+#endif
+}
+
 static void nft_pipapo_abort(const struct nft_set *set)
 {
        struct nft_pipapo *priv = nft_set_priv(set);
@@ -1672,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
        if (!priv->dirty)
                return;
 
-       m = rcu_dereference(priv->match);
+       m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
 
        new_clone = pipapo_clone(m);
        if (IS_ERR(new_clone))
@@ -1699,14 +1743,9 @@ static void nft_pipapo_activate(const struct net *net,
                                const struct nft_set *set,
                                const struct nft_set_elem *elem)
 {
-       struct nft_pipapo_elem *e;
-
-       e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
-       if (IS_ERR(e))
-               return;
+       struct nft_pipapo_elem *e = elem->priv;
 
        nft_set_elem_change_active(net, set, &e->ext);
-       nft_set_elem_clear_busy(&e->ext);
 }
 
 /**
@@ -1918,10 +1957,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 
        data = (const u8 *)nft_set_ext_key(&e->ext);
 
-       e = pipapo_get(net, set, data, 0);
-       if (IS_ERR(e))
-               return;
-
        while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
                union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
                const u8 *match_start, *match_end;
@@ -2005,8 +2040,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
                        goto cont;
 
                e = f->mt[r].e;
-               if (nft_set_elem_expired(&e->ext))
-                       goto cont;
 
                elem.priv = e;
 
index 8d73fff..c6435e7 100644 (file)
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
                      set->klen);
 }
 
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+       return nft_set_elem_expired(&rbe->ext) ||
+              nft_set_elem_is_dead(&rbe->ext);
+}
+
 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                                const u32 *key, const struct nft_set_ext **ext,
                                unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
                                continue;
                        }
 
-                       if (nft_set_elem_expired(&rbe->ext))
+                       if (nft_rbtree_elem_expired(rbe))
                                return false;
 
                        if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
 
        if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
            nft_set_elem_active(&interval->ext, genmask) &&
-           !nft_set_elem_expired(&interval->ext) &&
+           !nft_rbtree_elem_expired(interval) &&
            nft_rbtree_interval_start(interval)) {
                *ext = &interval->ext;
                return true;
@@ -215,6 +221,18 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
        return rbe;
 }
 
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+                                struct nft_rbtree *priv,
+                                struct nft_rbtree_elem *rbe)
+{
+       struct nft_set_elem elem = {
+               .priv   = rbe,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+       rb_erase(&rbe->node, &priv->root);
+}
+
 static int nft_rbtree_gc_elem(const struct nft_set *__set,
                              struct nft_rbtree *priv,
                              struct nft_rbtree_elem *rbe,
@@ -222,11 +240,12 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
 {
        struct nft_set *set = (struct nft_set *)__set;
        struct rb_node *prev = rb_prev(&rbe->node);
+       struct net *net = read_pnet(&set->net);
        struct nft_rbtree_elem *rbe_prev;
-       struct nft_set_gc_batch *gcb;
+       struct nft_trans_gc *gc;
 
-       gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
-       if (!gcb)
+       gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+       if (!gc)
                return -ENOMEM;
 
        /* search for end interval coming before this element.
@@ -244,17 +263,28 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
 
        if (prev) {
                rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+               nft_rbtree_gc_remove(net, set, priv, rbe_prev);
 
-               rb_erase(&rbe_prev->node, &priv->root);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, rbe_prev);
+               /* There is always room in this trans gc for this element,
+                * memory allocation never actually happens, hence, the warning
+                * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+                * this is synchronous gc which never fails.
+                */
+               gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+               if (WARN_ON_ONCE(!gc))
+                       return -ENOMEM;
+
+               nft_trans_gc_elem_add(gc, rbe_prev);
        }
 
-       rb_erase(&rbe->node, &priv->root);
-       atomic_dec(&set->nelems);
+       nft_rbtree_gc_remove(net, set, priv, rbe);
+       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+       if (WARN_ON_ONCE(!gc))
+               return -ENOMEM;
+
+       nft_trans_gc_elem_add(gc, rbe);
 
-       nft_set_gc_batch_add(gcb, rbe);
-       nft_set_gc_batch_complete(gcb);
+       nft_trans_gc_queue_sync_done(gc);
 
        return 0;
 }
@@ -482,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
        struct nft_rbtree_elem *rbe = elem->priv;
 
        nft_set_elem_change_active(net, set, &rbe->ext);
-       nft_set_elem_clear_busy(&rbe->ext);
 }
 
 static bool nft_rbtree_flush(const struct net *net,
@@ -490,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
 {
        struct nft_rbtree_elem *rbe = priv;
 
-       if (!nft_set_elem_mark_busy(&rbe->ext) ||
-           !nft_is_active(net, &rbe->ext)) {
-               nft_set_elem_change_active(net, set, &rbe->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &rbe->ext);
+
+       return true;
 }
 
 static void *nft_rbtree_deactivate(const struct net *net,
@@ -552,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (nft_set_elem_expired(&rbe->ext))
-                       goto cont;
                if (!nft_set_elem_active(&rbe->ext, iter->genmask))
                        goto cont;
 
@@ -572,26 +596,43 @@ cont:
 
 static void nft_rbtree_gc(struct work_struct *work)
 {
-       struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
-       struct nft_set_gc_batch *gcb = NULL;
+       struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+       struct nftables_pernet *nft_net;
        struct nft_rbtree *priv;
+       struct nft_trans_gc *gc;
        struct rb_node *node;
        struct nft_set *set;
+       unsigned int gc_seq;
        struct net *net;
-       u8 genmask;
 
        priv = container_of(work, struct nft_rbtree, gc_work.work);
        set  = nft_set_container_of(priv);
        net  = read_pnet(&set->net);
-       genmask = nft_genmask_cur(net);
+       nft_net = nft_pernet(net);
+       gc_seq  = READ_ONCE(nft_net->gc_seq);
+
+       if (nft_set_gc_is_pending(set))
+               goto done;
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
 
        write_lock_bh(&priv->lock);
        write_seqcount_begin(&priv->count);
        for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 
-               if (!nft_set_elem_active(&rbe->ext, genmask))
-                       continue;
+               if (nft_set_elem_is_dead(&rbe->ext))
+                       goto dead_elem;
 
                /* elements are reversed in the rbtree for historical reasons,
                 * from highest to lowest value, that is why end element is
@@ -604,46 +645,36 @@ static void nft_rbtree_gc(struct work_struct *work)
                if (!nft_set_elem_expired(&rbe->ext))
                        continue;
 
-               if (nft_set_elem_mark_busy(&rbe->ext)) {
-                       rbe_end = NULL;
+               nft_set_elem_dead(&rbe->ext);
+
+               if (!rbe_end)
                        continue;
-               }
 
-               if (rbe_prev) {
-                       rb_erase(&rbe_prev->node, &priv->root);
-                       rbe_prev = NULL;
-               }
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (!gcb)
-                       break;
+               nft_set_elem_dead(&rbe_end->ext);
 
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, rbe);
-               rbe_prev = rbe;
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
 
-               if (rbe_end) {
-                       atomic_dec(&set->nelems);
-                       nft_set_gc_batch_add(gcb, rbe_end);
-                       rb_erase(&rbe_end->node, &priv->root);
-                       rbe_end = NULL;
-               }
-               node = rb_next(node);
-               if (!node)
-                       break;
+               nft_trans_gc_elem_add(gc, rbe_end);
+               rbe_end = NULL;
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
+
+               nft_trans_gc_elem_add(gc, rbe);
        }
-       if (rbe_prev)
-               rb_erase(&rbe_prev->node, &priv->root);
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
        write_seqcount_end(&priv->count);
        write_unlock_bh(&priv->lock);
 
-       rbe = nft_set_catchall_gc(set);
-       if (rbe) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, rbe);
-       }
-       nft_set_gc_batch_complete(gcb);
-
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+done:
        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                           nft_set_gc_interval(set));
 }
index a6d2a0b..3d7a91e 100644 (file)
@@ -1829,7 +1829,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        parms.port_no = OVSP_LOCAL;
        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
        parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
-               ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+               ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
 
        /* So far only local changes have been made, now need the lock. */
        ovs_lock();
@@ -2049,7 +2049,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
        [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
                PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
-       [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+       [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
 };
 
 static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2302,7 +2302,7 @@ restart:
        parms.port_no = port_no;
        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
        parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
-               ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+               ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
 
        vport = new_vport(&parms);
        err = PTR_ERR(vport);
@@ -2539,7 +2539,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-       [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+       [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
        [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
 };
index a4631cb..a2935bd 100644 (file)
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
        union tpacket_uhdr h;
 
+       /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
-               h.h1->tp_status = status;
+               WRITE_ONCE(h.h1->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
                break;
        case TPACKET_V2:
-               h.h2->tp_status = status;
+               WRITE_ONCE(h.h2->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
                break;
        case TPACKET_V3:
-               h.h3->tp_status = status;
+               WRITE_ONCE(h.h3->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
                break;
        default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
 
        smp_rmb();
 
+       /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
-               return h.h1->tp_status;
+               return READ_ONCE(h.h1->tp_status);
        case TPACKET_V2:
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
-               return h.h2->tp_status;
+               return READ_ONCE(h.h2->tp_status);
        case TPACKET_V3:
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
-               return h.h3->tp_status;
+               return READ_ONCE(h.h3->tp_status);
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
index aa6b1fe..e9eaf63 100644 (file)
@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
        return 0;
 }
 
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+       return (n->nlmsg_flags & NLM_F_CREATE &&
+               n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+       return (n->nlmsg_flags & NLM_F_CREATE &&
+               n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+       return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+               !(n->nlmsg_flags & NLM_F_REPLACE) &&
+               !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
 /*
  * Create/change qdisc.
  */
-
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
                           struct netlink_ext_ack *extack)
 {
@@ -1644,27 +1662,35 @@ replay:
                                 *
                                 *   We know, that some child q is already
                                 *   attached to this parent and have choice:
-                                *   either to change it or to create/graft new one.
+                                *   1) change it or 2) create/graft new one.
+                                *   If the requested qdisc kind is different
+                                *   than the existing one, then we choose graft.
+                                *   If they are the same then this is "change"
+                                *   operation - just let it fallthrough..
                                 *
                                 *   1. We are allowed to create/graft only
-                                *   if CREATE and REPLACE flags are set.
+                                *   if the request is explicitly stating
+                                *   "please create if it doesn't exist".
                                 *
-                                *   2. If EXCL is set, requestor wanted to say,
-                                *   that qdisc tcm_handle is not expected
+                                *   2. If the request is to exclusive create
+                                *   then the qdisc tcm_handle is not expected
                                 *   to exist, so that we choose create/graft too.
                                 *
                                 *   3. The last case is when no flags are set.
+                                *   This will happen when for example tc
+                                *   utility issues a "change" command.
                                 *   Alas, it is sort of hole in API, we
                                 *   cannot decide what to do unambiguously.
-                                *   For now we select create/graft, if
-                                *   user gave KIND, which does not match existing.
+                                *   For now we select create/graft.
                                 */
-                               if ((n->nlmsg_flags & NLM_F_CREATE) &&
-                                   (n->nlmsg_flags & NLM_F_REPLACE) &&
-                                   ((n->nlmsg_flags & NLM_F_EXCL) ||
-                                    (tca[TCA_KIND] &&
-                                     nla_strcmp(tca[TCA_KIND], q->ops->id))))
-                                       goto create_n_graft;
+                               if (tca[TCA_KIND] &&
+                                   nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+                                       if (req_create_or_replace(n) ||
+                                           req_create_exclusive(n))
+                                               goto create_n_graft;
+                                       else if (req_change(n))
+                                               goto create_n_graft2;
+                               }
                        }
                }
        } else {
@@ -1698,6 +1724,7 @@ create_n_graft:
                NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
                return -ENOENT;
        }
+create_n_graft2:
        if (clid == TC_H_INGRESS) {
                if (dev_ingress_queue(dev)) {
                        q = qdisc_create(dev, dev_ingress_queue(dev),
index 9388d98..76f1bce 100644 (file)
@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
 {
-       sctp_memory_pressure = 1;
+       WRITE_ONCE(sctp_memory_pressure, 1);
 }
 
 
@@ -9479,7 +9479,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
        newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
        newinet->inet_dport = htons(asoc->peer.port);
        newinet->pmtudisc = inet->pmtudisc;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        newinet->uc_ttl = inet->uc_ttl;
        newinet->mc_loop = 1;
index 0c013d2..f5834af 100644 (file)
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
        sk->sk_state = SMC_INIT;
        sk->sk_destruct = smc_destruct;
        sk->sk_protocol = protocol;
-       WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
-       WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+       WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+       WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
        smc = smc_sk(sk);
        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
        INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,13 +436,60 @@ out:
        return rc;
 }
 
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
+
+#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
+                            (1UL << SOCK_KEEPOPEN) | \
+                            (1UL << SOCK_LINGER) | \
+                            (1UL << SOCK_BROADCAST) | \
+                            (1UL << SOCK_TIMESTAMP) | \
+                            (1UL << SOCK_DBG) | \
+                            (1UL << SOCK_RCVTSTAMP) | \
+                            (1UL << SOCK_RCVTSTAMPNS) | \
+                            (1UL << SOCK_LOCALROUTE) | \
+                            (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
+                            (1UL << SOCK_RXQ_OVFL) | \
+                            (1UL << SOCK_WIFI_STATUS) | \
+                            (1UL << SOCK_NOFCS) | \
+                            (1UL << SOCK_FILTER_LOCKED) | \
+                            (1UL << SOCK_TSTAMP_NEW))
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+                                    unsigned long mask)
+{
+       struct net *nnet = sock_net(nsk);
+
+       nsk->sk_userlocks = osk->sk_userlocks;
+       if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+               nsk->sk_sndbuf = osk->sk_sndbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_wmem));
+       }
+       if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+               nsk->sk_rcvbuf = osk->sk_rcvbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_rmem));
+       }
+}
+
 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
                                   unsigned long mask)
 {
        /* options we don't get control via setsockopt for */
        nsk->sk_type = osk->sk_type;
-       nsk->sk_sndbuf = osk->sk_sndbuf;
-       nsk->sk_rcvbuf = osk->sk_rcvbuf;
        nsk->sk_sndtimeo = osk->sk_sndtimeo;
        nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
        nsk->sk_mark = READ_ONCE(osk->sk_mark);
@@ -453,26 +500,10 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 
        nsk->sk_flags &= ~mask;
        nsk->sk_flags |= osk->sk_flags & mask;
+
+       smc_adjust_sock_bufsizes(nsk, osk, mask);
 }
 
-#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
-                            (1UL << SOCK_KEEPOPEN) | \
-                            (1UL << SOCK_LINGER) | \
-                            (1UL << SOCK_BROADCAST) | \
-                            (1UL << SOCK_TIMESTAMP) | \
-                            (1UL << SOCK_DBG) | \
-                            (1UL << SOCK_RCVTSTAMP) | \
-                            (1UL << SOCK_RCVTSTAMPNS) | \
-                            (1UL << SOCK_LOCALROUTE) | \
-                            (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
-                            (1UL << SOCK_RXQ_OVFL) | \
-                            (1UL << SOCK_WIFI_STATUS) | \
-                            (1UL << SOCK_NOFCS) | \
-                            (1UL << SOCK_FILTER_LOCKED) | \
-                            (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 {
        smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
                sock_hold(lsk); /* sock_put in smc_listen_work */
                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                smc_copy_sock_settings_to_smc(new_smc);
-               new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
-               new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
                sock_hold(&new_smc->sk); /* sock_put in passive closing */
                if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
                        sock_put(&new_smc->sk);
index 2eeea4c..1f2b912 100644 (file)
@@ -161,7 +161,7 @@ struct smc_connection {
 
        struct smc_buf_desc     *sndbuf_desc;   /* send buffer descriptor */
        struct smc_buf_desc     *rmb_desc;      /* RMBE descriptor */
-       int                     rmbe_size_short;/* compressed notation */
+       int                     rmbe_size_comp; /* compressed notation */
        int                     rmbe_update_limit;
                                                /* lower limit for consumer
                                                 * cursor update
index b9b8b07..c90d9e5 100644 (file)
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                clc->d0.gid =
                        conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
                clc->d0.token = conn->rmb_desc->token;
-               clc->d0.dmbe_size = conn->rmbe_size_short;
+               clc->d0.dmbe_size = conn->rmbe_size_comp;
                clc->d0.dmbe_idx = 0;
                memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
                if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                        clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
                        break;
                }
-               clc->r0.rmbe_size = conn->rmbe_size_short;
+               clc->r0.rmbe_size = conn->rmbe_size_comp;
                clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
                        cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
                        cpu_to_be64((u64)sg_dma_address
index 3f465fa..6b78075 100644 (file)
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
        struct smc_connection *conn = &smc->conn;
        struct smc_link_group *lgr = conn->lgr;
        struct list_head *buf_list;
-       int bufsize, bufsize_short;
+       int bufsize, bufsize_comp;
        struct rw_semaphore *lock;      /* lock buffer list */
        bool is_dgraded = false;
-       int sk_buf_size;
 
        if (is_rmb)
                /* use socket recv buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_rcvbuf;
+               bufsize = smc->sk.sk_rcvbuf / 2;
        else
                /* use socket send buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_sndbuf;
+               bufsize = smc->sk.sk_sndbuf / 2;
 
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
-            bufsize_short >= 0; bufsize_short--) {
+       for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+            bufsize_comp >= 0; bufsize_comp--) {
                if (is_rmb) {
                        lock = &lgr->rmbs_lock;
-                       buf_list = &lgr->rmbs[bufsize_short];
+                       buf_list = &lgr->rmbs[bufsize_comp];
                } else {
                        lock = &lgr->sndbufs_lock;
-                       buf_list = &lgr->sndbufs[bufsize_short];
+                       buf_list = &lgr->sndbufs[bufsize_comp];
                }
-               bufsize = smc_uncompress_bufsize(bufsize_short);
+               bufsize = smc_uncompress_bufsize(bufsize_comp);
 
                /* check for reusable slot in the link group */
-               buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+               buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
                if (buf_desc) {
                        buf_desc->is_dma_need_sync = 0;
                        SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
        if (is_rmb) {
                conn->rmb_desc = buf_desc;
-               conn->rmbe_size_short = bufsize_short;
-               smc->sk.sk_rcvbuf = bufsize;
+               conn->rmbe_size_comp = bufsize_comp;
+               smc->sk.sk_rcvbuf = bufsize * 2;
                atomic_set(&conn->bytes_to_rcv, 0);
                conn->rmbe_update_limit =
                        smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                        smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
        } else {
                conn->sndbuf_desc = buf_desc;
-               smc->sk.sk_sndbuf = bufsize;
+               smc->sk.sk_sndbuf = bufsize * 2;
                atomic_set(&conn->sndbuf_space, bufsize);
        }
        return 0;
index b6f79fa..0b2a957 100644 (file)
 
 static int min_sndbuf = SMC_BUF_MIN_SIZE;
 static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
 
 static struct ctl_table smc_table[] = {
        {
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &min_sndbuf,
+               .extra2         = &max_sndbuf,
        },
        {
                .procname       = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &min_rcvbuf,
+               .extra2         = &max_rcvbuf,
        },
        {  }
 };
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
        net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
        net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
        net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
-       WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
-       WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+       WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+       WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
 
        return 0;
 
index e43f263..2eb8df4 100644 (file)
@@ -1244,6 +1244,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
        if (ret != head->iov_len)
                goto out;
 
+       if (xdr_buf_pagecount(xdr))
+               xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
+
        msg.msg_flags = MSG_SPLICE_PAGES;
        iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
                      xdr_buf_pagecount(xdr), xdr->page_len);
index b098fde..28c0771 100644 (file)
@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
        if (!rep->rr_rdmabuf)
                goto out_free;
 
-       if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
-               goto out_free_regbuf;
-
        rep->rr_cid.ci_completion_id =
                atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
 
@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
        spin_unlock(&buf->rb_lock);
        return rep;
 
-out_free_regbuf:
-       rpcrdma_regbuf_free(rep->rr_rdmabuf);
 out_free:
        kfree(rep);
 out:
@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
                        rep = rpcrdma_rep_create(r_xprt, temp);
                if (!rep)
                        break;
+               if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+                       rpcrdma_rep_put(buf, rep);
+                       break;
+               }
 
                rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
                trace_xprtrdma_post_recv(rep);
index 2021fe5..529101e 100644 (file)
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
 static LIST_HEAD(tls_device_down_list);
 static DEFINE_SPINLOCK(tls_device_lock);
 
+static struct page *dummy_page;
+
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
        if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
        return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
-static int tls_device_record_close(struct sock *sk,
-                                  struct tls_context *ctx,
-                                  struct tls_record_info *record,
-                                  struct page_frag *pfrag,
-                                  unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+                                   struct tls_context *ctx,
+                                   struct tls_record_info *record,
+                                   struct page_frag *pfrag,
+                                   unsigned char record_type)
 {
        struct tls_prot_info *prot = &ctx->prot_info;
-       int ret;
+       struct page_frag dummy_tag_frag;
 
        /* append tag
         * device will fill in the tag, we just need to append a placeholder
         * use socket memory to improve coalescing (re-using a single buffer
         * increases frag count)
-        * if we can't allocate memory now, steal some back from data
+        * if we can't allocate memory now use the dummy page
         */
-       if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
-                                       sk->sk_allocation))) {
-               ret = 0;
-               tls_append_frag(record, pfrag, prot->tag_size);
-       } else {
-               ret = prot->tag_size;
-               if (record->len <= prot->overhead_size)
-                       return -ENOMEM;
+       if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+           !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+               dummy_tag_frag.page = dummy_page;
+               dummy_tag_frag.offset = 0;
+               pfrag = &dummy_tag_frag;
        }
+       tls_append_frag(record, pfrag, prot->tag_size);
 
        /* fill prepend */
        tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
                         record->len - prot->overhead_size,
                         record_type);
-       return ret;
 }
 
 static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
 
                if (done || record->len >= max_open_record_len ||
                    (record->num_frags >= MAX_SKB_FRAGS - 1)) {
-                       rc = tls_device_record_close(sk, tls_ctx, record,
-                                                    pfrag, record_type);
-                       if (rc) {
-                               if (rc > 0) {
-                                       size += rc;
-                               } else {
-                                       size = orig_size;
-                                       destroy_record(record);
-                                       ctx->open_record = NULL;
-                                       break;
-                               }
-                       }
+                       tls_device_record_close(sk, tls_ctx, record,
+                                               pfrag, record_type);
 
                        rc = tls_push_record(sk,
                                             tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
 {
        int err;
 
-       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
-       if (!destruct_wq)
+       dummy_page = alloc_page(GFP_KERNEL);
+       if (!dummy_page)
                return -ENOMEM;
 
+       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+       if (!destruct_wq) {
+               err = -ENOMEM;
+               goto err_free_dummy;
+       }
+
        err = register_netdevice_notifier(&tls_dev_notifier);
        if (err)
-               destroy_workqueue(destruct_wq);
+               goto err_destroy_wq;
 
+       return 0;
+
+err_destroy_wq:
+       destroy_workqueue(destruct_wq);
+err_free_dummy:
+       put_page(dummy_page);
        return err;
 }
 
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
        unregister_netdevice_notifier(&tls_dev_notifier);
        destroy_workqueue(destruct_wq);
        clean_acked_data_flush();
+       put_page(dummy_page);
 }
index b689612..4a8ee2f 100644 (file)
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
 
        ctx->splicing_pages = true;
        while (1) {
-               if (sg_is_last(sg))
-                       msg.msg_flags = flags;
-
                /* is sending application-limited? */
                tcp_rate_check_app_limited(sk);
                p = sg_page(sg);
index 0da2e6a..8bcf8e2 100644 (file)
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
        if (!wiphy->mbssid_max_interfaces)
                return ERR_PTR(-EINVAL);
 
-       nla_for_each_nested(nl_elems, attrs, rem_elems)
+       nla_for_each_nested(nl_elems, attrs, rem_elems) {
+               if (num_elems >= 255)
+                       return ERR_PTR(-EINVAL);
                num_elems++;
+       }
 
        elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
        if (!elems)
index b89adb5..10ea85c 100644 (file)
@@ -994,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                                err = xp_alloc_tx_descs(xs->pool, xs);
                                if (err) {
                                        xp_put_pool(xs->pool);
+                                       xs->pool = NULL;
                                        sockfd_put(sock);
                                        goto out_unlock;
                                }
index 8cbf45a..655fe4f 100644 (file)
@@ -108,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
        [XFRMA_ALG_COMP]        = { .len = sizeof(struct xfrm_algo) },
        [XFRMA_ENCAP]           = { .len = sizeof(struct xfrm_encap_tmpl) },
        [XFRMA_TMPL]            = { .len = sizeof(struct xfrm_user_tmpl) },
-       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_sec_ctx) },
+       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_user_sec_ctx) },
        [XFRMA_LTIME_VAL]       = { .len = sizeof(struct xfrm_lifetime_cur) },
        [XFRMA_REPLAY_VAL]      = { .len = sizeof(struct xfrm_replay_state) },
        [XFRMA_REPLAY_THRESH]   = { .type = NLA_U32 },
index 815b380..d5ee967 100644 (file)
@@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
        int optlen = 0;
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IP);
+
        if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
                struct ip_beet_phdr *ph;
                int phlen;
@@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IP);
+
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                goto out;
 
@@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IPV6);
+
        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
                goto out;
 
@@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
        int size = sizeof(struct ipv6hdr);
        int err;
 
+       skb->protocol = htons(ETH_P_IPV6);
+
        err = skb_cow_head(skb, size + skb->mac_len);
        if (err)
                goto out;
@@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
                        return xfrm6_remove_tunnel_encap(x, skb);
                break;
                }
+               return -EINVAL;
        }
 
        WARN_ON_ONCE(1);
@@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
                return -EAFNOSUPPORT;
        }
 
-       switch (XFRM_MODE_SKB_CB(skb)->protocol) {
-       case IPPROTO_IPIP:
-       case IPPROTO_BEETPH:
-               skb->protocol = htons(ETH_P_IP);
-               break;
-       case IPPROTO_IPV6:
-               skb->protocol = htons(ETH_P_IPV6);
-               break;
-       default:
-               WARN_ON_ONCE(1);
-               break;
-       }
-
        return xfrm_inner_mode_encap_remove(x, skb);
 }
 
index a331996..b864740 100644 (file)
@@ -537,8 +537,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 
        switch (skb->protocol) {
        case htons(ETH_P_IPV6):
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                if (!dst) {
                        fl.u.ip6.flowi6_oif = dev->ifindex;
                        fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
@@ -552,8 +552,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
                }
                break;
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                if (!dst) {
                        struct rtable *rt;
 
index 49e63ee..bda5327 100644 (file)
@@ -1324,12 +1324,8 @@ found:
                        struct xfrm_dev_offload *xso = &x->xso;
 
                        if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
-                               xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
-                               xso->dir = 0;
-                               netdev_put(xso->dev, &xso->dev_tracker);
-                               xso->dev = NULL;
-                               xso->real_dev = NULL;
-                               xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+                               xfrm_dev_state_delete(x);
+                               xfrm_dev_state_free(x);
                        }
 #endif
                        x->km.state = XFRM_STATE_DEAD;
index c34a2a0..ad01997 100644 (file)
@@ -628,7 +628,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
        struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
        struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH];
 
-       if (re) {
+       if (re && x->replay_esn && x->preplay_esn) {
                struct xfrm_replay_state_esn *replay_esn;
                replay_esn = nla_data(re);
                memcpy(x->replay_esn, replay_esn,
@@ -1267,6 +1267,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
                                         sizeof(*filter), GFP_KERNEL);
                        if (filter == NULL)
                                return -ENOMEM;
+
+                       /* see addr_match(), (prefix length >> 5) << 2
+                        * will be used to compare xfrm_address_t
+                        */
+                       if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
+                           filter->dplen > (sizeof(xfrm_address_t) << 3)) {
+                               kfree(filter);
+                               return -EINVAL;
+                       }
                }
 
                if (attrs[XFRMA_PROTO])
@@ -2336,6 +2345,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                                            NETLINK_CB(skb).portid);
                }
        } else {
+               xfrm_dev_policy_delete(xp);
                xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
                if (err != 0)
@@ -3015,7 +3025,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_ALG_COMP]        = { .len = sizeof(struct xfrm_algo) },
        [XFRMA_ENCAP]           = { .len = sizeof(struct xfrm_encap_tmpl) },
        [XFRMA_TMPL]            = { .len = sizeof(struct xfrm_user_tmpl) },
-       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_sec_ctx) },
+       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_user_sec_ctx) },
        [XFRMA_LTIME_VAL]       = { .len = sizeof(struct xfrm_lifetime_cur) },
        [XFRMA_REPLAY_VAL]      = { .len = sizeof(struct xfrm_replay_state) },
        [XFRMA_REPLAY_THRESH]   = { .type = NLA_U32 },
@@ -3035,6 +3045,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_SET_MARK]        = { .type = NLA_U32 },
        [XFRMA_SET_MARK_MASK]   = { .type = NLA_U32 },
        [XFRMA_IF_ID]           = { .type = NLA_U32 },
+       [XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
 };
 EXPORT_SYMBOL_GPL(xfrma_policy);
 
index 34d5e7f..ee06044 100644 (file)
@@ -74,6 +74,7 @@ pub(crate) fn vtable(_attr: TokenStream, ts: TokenStream) -> TokenStream {
                 const {gen_const_name}: bool = false;",
             )
             .unwrap();
+            consts.insert(gen_const_name);
         }
     } else {
         const_items = "const USE_VTABLE_ATTR: () = ();".to_owned();
index e5ed080..e2a6a69 100644 (file)
@@ -105,7 +105,7 @@ asm (
 "      .type           my_tramp1, @function\n"
 "      .globl          my_tramp1\n"
 "   my_tramp1:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #16\n"
 "      stp     x9, x30, [sp]\n"
 "      bl      my_direct_func1\n"
@@ -117,7 +117,7 @@ asm (
 "      .type           my_tramp2, @function\n"
 "      .globl          my_tramp2\n"
 "   my_tramp2:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #16\n"
 "      stp     x9, x30, [sp]\n"
 "      bl      my_direct_func2\n"
index 292cff2..2e34983 100644 (file)
@@ -112,7 +112,7 @@ asm (
 "      .type           my_tramp1, @function\n"
 "      .globl          my_tramp1\n"
 "   my_tramp1:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #32\n"
 "      stp     x9, x30, [sp]\n"
 "      str     x0, [sp, #16]\n"
@@ -127,7 +127,7 @@ asm (
 "      .type           my_tramp2, @function\n"
 "      .globl          my_tramp2\n"
 "   my_tramp2:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #32\n"
 "      stp     x9, x30, [sp]\n"
 "      str     x0, [sp, #16]\n"
index b4391e0..9243dbf 100644 (file)
@@ -75,7 +75,7 @@ asm (
 "      .type           my_tramp, @function\n"
 "      .globl          my_tramp\n"
 "   my_tramp:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #32\n"
 "      stp     x9, x30, [sp]\n"
 "      str     x0, [sp, #16]\n"
index e9804c5..e39c356 100644 (file)
@@ -81,7 +81,7 @@ asm (
 "      .type           my_tramp, @function\n"
 "      .globl          my_tramp\n"
 "   my_tramp:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #48\n"
 "      stp     x9, x30, [sp]\n"
 "      stp     x0, x1, [sp, #16]\n"
index 20f4a7c..32c477d 100644 (file)
@@ -72,7 +72,7 @@ asm (
 "      .type           my_tramp, @function\n"
 "      .globl          my_tramp\n"
 "   my_tramp:"
-"      bti     c\n"
+"      hint    34\n" // bti    c
 "      sub     sp, sp, #32\n"
 "      stp     x9, x30, [sp]\n"
 "      str     x0, [sp, #16]\n"
index 16c8793..653b92f 100644 (file)
@@ -129,6 +129,7 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
        ssize_t readlen;
        struct sym_entry *sym;
 
+       errno = 0;
        readlen = getline(buf, buf_len, in);
        if (readlen < 0) {
                if (errno) {
index b72b82b..b348e16 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/sysctl.h>
 #include "internal.h"
 
-struct ctl_table key_sysctls[] = {
+static struct ctl_table key_sysctls[] = {
        {
                .procname = "maxkeys",
                .data = &key_quota_maxkeys,
index 31b08b3..dc90486 100644 (file)
@@ -2005,6 +2005,7 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
                if (!datum)
                        goto out;
 
+               datum->next = NULL;
                *dst = datum;
 
                /* ebitmap_read() will at least init the bitmap */
@@ -2017,7 +2018,6 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
                        goto out;
 
                datum->otype = le32_to_cpu(buf[0]);
-               datum->next = NULL;
 
                dst = &datum->next;
        }
index b288874..36b411d 100644 (file)
@@ -550,6 +550,10 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0C50, "Dolphin", CS8409_DOLPHIN),
        SND_PCI_QUIRK(0x1028, 0x0C51, "Dolphin", CS8409_DOLPHIN),
        SND_PCI_QUIRK(0x1028, 0x0C52, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C73, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C75, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C7D, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C7F, "Dolphin", CS8409_DOLPHIN),
        {} /* terminator */
 };
 
index 44fccfb..dc7b7a4 100644 (file)
@@ -9422,11 +9422,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x1028, 0x0cc5, "Dell Oasis MLK 14 RPL-P", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -9617,7 +9616,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8b96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
        SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
        SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
-       SND_PCI_QUIRK(0x103c, 0x8c26, "HP HP EliteBook 800G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c48, "HP EliteBook 860 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c49, "HP Elite x360 830 2-in-1 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -10638,6 +10643,7 @@ static int patch_alc269(struct hda_codec *codec)
        spec = codec->spec;
        spec->gen.shared_mic_vref_pin = 0x18;
        codec->power_save_node = 0;
+       spec->en_3kpull_low = true;
 
 #ifdef CONFIG_PM
        codec->patch_ops.suspend = alc269_suspend;
@@ -10720,14 +10726,16 @@ static int patch_alc269(struct hda_codec *codec)
                spec->shutup = alc256_shutup;
                spec->init_hook = alc256_init;
                spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
-               if (codec->bus->pci->vendor == PCI_VENDOR_ID_AMD)
-                       spec->en_3kpull_low = true;
+               if (codec->core.vendor_id == 0x10ec0236 &&
+                   codec->bus->pci->vendor != PCI_VENDOR_ID_AMD)
+                       spec->en_3kpull_low = false;
                break;
        case 0x10ec0257:
                spec->codec_variant = ALC269_TYPE_ALC257;
                spec->shutup = alc256_shutup;
                spec->init_hook = alc256_init;
                spec->gen.mixer_nid = 0;
+               spec->en_3kpull_low = false;
                break;
        case 0x10ec0215:
        case 0x10ec0245:
index b033bd2..48444dd 100644 (file)
@@ -152,8 +152,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i
 void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { }
 #endif /* SUPPORT_JOYSTICK */
 
-static int snd_card_ymfpci_probe(struct pci_dev *pci,
-                                const struct pci_device_id *pci_id)
+static int __snd_card_ymfpci_probe(struct pci_dev *pci,
+                                  const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -348,6 +348,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_card_ymfpci_probe(struct pci_dev *pci,
+                                const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id));
+}
+
 static struct pci_driver ymfpci_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_ymfpci_ids,
index a2fe3bd..b304b35 100644 (file)
@@ -217,7 +217,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                .driver_data = &acp6x_card,
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "82"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
                }
        },
        {
@@ -251,6 +251,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
        {
                .driver_data = &acp6x_card,
                .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
                }
index 6ac501f..8a879b6 100644 (file)
@@ -168,7 +168,7 @@ static int cs35l41_get_fs_mon_config_index(int freq)
 static const DECLARE_TLV_DB_RANGE(dig_vol_tlv,
                0, 0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1),
                1, 913, TLV_DB_MINMAX_ITEM(-10200, 1200));
-static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 50, 100, 0);
 
 static const struct snd_kcontrol_new dre_ctrl =
        SOC_DAPM_SINGLE("Switch", CS35L41_PWR_CTRL3, 20, 1, 0);
index ed2a419..40666e6 100644 (file)
@@ -62,10 +62,19 @@ static const struct i2c_device_id cs35l56_id_i2c[] = {
 };
 MODULE_DEVICE_TABLE(i2c, cs35l56_id_i2c);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+       { "CSC355C", 0 },
+       {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct i2c_driver cs35l56_i2c_driver = {
        .driver = {
                .name           = "cs35l56",
                .pm = &cs35l56_pm_ops_i2c_spi,
+               .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
        },
        .id_table       = cs35l56_id_i2c,
        .probe          = cs35l56_i2c_probe,
index 996aab1..302f9c4 100644 (file)
@@ -59,10 +59,19 @@ static const struct spi_device_id cs35l56_id_spi[] = {
 };
 MODULE_DEVICE_TABLE(spi, cs35l56_id_spi);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+       { "CSC355C", 0 },
+       {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct spi_driver cs35l56_spi_driver = {
        .driver = {
                .name           = "cs35l56",
                .pm = &cs35l56_pm_ops_i2c_spi,
+               .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
        },
        .id_table       = cs35l56_id_spi,
        .probe          = cs35l56_spi_probe,
index c03f9d3..fd06b9f 100644 (file)
@@ -5,7 +5,6 @@
 // Copyright (C) 2023 Cirrus Logic, Inc. and
 //                    Cirrus Logic International Semiconductor Ltd.
 
-#include <linux/acpi.h>
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
@@ -1354,26 +1353,22 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56)
        return 0;
 }
 
-static int cs35l56_acpi_get_name(struct cs35l56_private *cs35l56)
+static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
 {
-       acpi_handle handle = ACPI_HANDLE(cs35l56->dev);
-       const char *sub;
+       struct device *dev = cs35l56->dev;
+       const char *prop;
+       int ret;
 
-       /* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */
-       if (!handle)
+       ret = device_property_read_string(dev, "cirrus,firmware-uid", &prop);
+       /* If bad sw node property, return 0 and fallback to legacy firmware path */
+       if (ret < 0)
                return 0;
 
-       sub = acpi_get_subsystem_id(handle);
-       if (IS_ERR(sub)) {
-               /* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */
-               if (PTR_ERR(sub) == -ENODATA)
-                       return 0;
-               else
-                       return PTR_ERR(sub);
-       }
+       cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+       if (cs35l56->dsp.system_name == NULL)
+               return -ENOMEM;
 
-       cs35l56->dsp.system_name = sub;
-       dev_dbg(cs35l56->dev, "Subsystem ID: %s\n", cs35l56->dsp.system_name);
+       dev_dbg(dev, "Firmware UID: %s\n", cs35l56->dsp.system_name);
 
        return 0;
 }
@@ -1417,7 +1412,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
                gpiod_set_value_cansleep(cs35l56->reset_gpio, 1);
        }
 
-       ret = cs35l56_acpi_get_name(cs35l56);
+       ret = cs35l56_get_firmware_uid(cs35l56);
        if (ret != 0)
                goto err;
 
@@ -1604,8 +1599,6 @@ void cs35l56_remove(struct cs35l56_private *cs35l56)
 
        regcache_cache_only(cs35l56->regmap, true);
 
-       kfree(cs35l56->dsp.system_name);
-
        gpiod_set_value_cansleep(cs35l56->reset_gpio, 0);
        regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies);
 }
index b5c69bb..2dfaf4f 100644 (file)
@@ -185,10 +185,10 @@ static int max98363_io_init(struct sdw_slave *slave)
        pm_runtime_get_noresume(dev);
 
        ret = regmap_read(max98363->regmap, MAX98363_R21FF_REV_ID, &reg);
-       if (!ret) {
+       if (!ret)
                dev_info(dev, "Revision ID: %X\n", reg);
-               return ret;
-       }
+       else
+               goto out;
 
        if (max98363->first_hw_init) {
                regcache_cache_bypass(max98363->regmap, false);
@@ -198,10 +198,11 @@ static int max98363_io_init(struct sdw_slave *slave)
        max98363->first_hw_init = true;
        max98363->hw_init = true;
 
+out:
        pm_runtime_mark_last_busy(dev);
        pm_runtime_put_autosuspend(dev);
 
-       return 0;
+       return ret;
 }
 
 #define MAX98363_RATES SNDRV_PCM_RATE_8000_192000
index f43520c..e566c8d 100644 (file)
@@ -52,6 +52,7 @@ static bool rt1308_volatile_register(struct device *dev, unsigned int reg)
        case 0x300a:
        case 0xc000:
        case 0xc710:
+       case 0xcf01:
        case 0xc860 ... 0xc863:
        case 0xc870 ... 0xc873:
                return true;
@@ -213,7 +214,7 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
 {
        struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(dev);
        int ret = 0;
-       unsigned int tmp;
+       unsigned int tmp, hibernation_flag;
 
        if (rt1308->hw_init)
                return 0;
@@ -242,6 +243,10 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
 
        pm_runtime_get_noresume(&slave->dev);
 
+       regmap_read(rt1308->regmap, 0xcf01, &hibernation_flag);
+       if ((hibernation_flag != 0x00) && rt1308->first_hw_init)
+               goto _preset_ready_;
+
        /* sw reset */
        regmap_write(rt1308->regmap, RT1308_SDW_RESET, 0);
 
@@ -282,6 +287,12 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
        regmap_write(rt1308->regmap, 0xc100, 0xd7);
        regmap_write(rt1308->regmap, 0xc101, 0xd7);
 
+       /* apply BQ params */
+       rt1308_apply_bq_params(rt1308);
+
+       regmap_write(rt1308->regmap, 0xcf01, 0x01);
+
+_preset_ready_:
        if (rt1308->first_hw_init) {
                regcache_cache_bypass(rt1308->regmap, false);
                regcache_mark_dirty(rt1308->regmap);
index 83c367a..525713c 100644 (file)
@@ -4472,6 +4472,8 @@ static void rt5665_remove(struct snd_soc_component *component)
        struct rt5665_priv *rt5665 = snd_soc_component_get_drvdata(component);
 
        regmap_write(rt5665->regmap, RT5665_RESET, 0);
+
+       regulator_bulk_disable(ARRAY_SIZE(rt5665->supplies), rt5665->supplies);
 }
 
 #ifdef CONFIG_PM
index a88c6c2..ffb26e4 100644 (file)
@@ -57,16 +57,17 @@ static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
 
                if (client->addr != tasdev->dev_addr) {
                        client->addr = tasdev->dev_addr;
-                       if (tasdev->cur_book == book) {
-                               ret = regmap_write(map,
-                                       TASDEVICE_PAGE_SELECT, 0);
-                               if (ret < 0) {
-                                       dev_err(tas_priv->dev, "%s, E=%d\n",
-                                               __func__, ret);
-                                       goto out;
-                               }
+                       /* All tas2781s share the same regmap, clear the page
+                        * inside regmap once switching to another tas2781.
+                        * Register 0 at any pages and any books inside tas2781
+                        * is the same one for page-switching.
+                        */
+                       ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+                       if (ret < 0) {
+                               dev_err(tas_priv->dev, "%s, E=%d\n",
+                                       __func__, ret);
+                               goto out;
                        }
-                       goto out;
                }
 
                if (tasdev->cur_book != book) {
index 3f08082..9d01225 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 // Copyright 2018 NXP
 
 #include <linux/bitfield.h>
@@ -1254,4 +1254,4 @@ module_platform_driver(fsl_micfil_driver);
 
 MODULE_AUTHOR("Cosmin-Gabriel Samoila <cosmin.samoila@nxp.com>");
 MODULE_DESCRIPTION("NXP PDM Microphone Interface (MICFIL) driver");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("Dual BSD/GPL");
index 9237a1c..fee9fe3 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
  * PDM Microphone Interface for the NXP i.MX SoC
  * Copyright 2018 NXP
index dbee8c9..0201029 100644 (file)
@@ -476,7 +476,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Lunar Lake Client Platform"),
                },
-               .driver_data = (void *)(RT711_JD2_100K),
+               .driver_data = (void *)(RT711_JD2),
        },
        {}
 };
index c4a16e4..ad130d9 100644 (file)
@@ -99,9 +99,9 @@ static int cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd)
        jack = &ctx->sdw_headset;
 
        snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOLUMEUP);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEDOWN);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOICECOMMAND);
 
        ret = snd_soc_component_set_jack(component, jack, NULL);
 
index 9883dc7..63333a2 100644 (file)
@@ -30,27 +30,32 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
                                        struct axg_tdm_stream *ts,
                                        unsigned int offset)
 {
-       unsigned int val, ch = ts->channels;
-       unsigned long mask;
-       int i, j;
+       unsigned int ch = ts->channels;
+       u32 val[AXG_TDM_NUM_LANES];
+       int i, j, k;
+
+       /*
+        * We need to mimick the slot distribution used by the HW to keep the
+        * channel placement consistent regardless of the number of channel
+        * in the stream. This is why the odd algorithm below is used.
+        */
+       memset(val, 0, sizeof(*val) * AXG_TDM_NUM_LANES);
 
        /*
         * Distribute the channels of the stream over the available slots
-        * of each TDM lane
+        * of each TDM lane. We need to go over the 32 slots ...
         */
-       for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
-               val = 0;
-               mask = ts->mask[i];
-
-               for (j = find_first_bit(&mask, 32);
-                    (j < 32) && ch;
-                    j = find_next_bit(&mask, 32, j + 1)) {
-                       val |= 1 << j;
-                       ch -= 1;
+       for (i = 0; (i < 32) && ch; i += 2) {
+               /* ... of all the lanes ... */
+               for (j = 0; j < AXG_TDM_NUM_LANES; j++) {
+                       /* ... then distribute the channels in pairs */
+                       for (k = 0; k < 2; k++) {
+                               if ((BIT(i + k) & ts->mask[j]) && ch) {
+                                       val[j] |= BIT(i + k);
+                                       ch -= 1;
+                               }
+                       }
                }
-
-               regmap_write(map, offset, val);
-               offset += regmap_get_reg_stride(map);
        }
 
        /*
@@ -63,6 +68,11 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
                return -EINVAL;
        }
 
+       for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
+               regmap_write(map, offset, val[i]);
+               offset += regmap_get_reg_stride(map);
+       }
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(axg_tdm_formatter_set_channel_masks);
index 8896227..3aa6b98 100644 (file)
@@ -38,6 +38,7 @@ static inline int _soc_pcm_ret(struct snd_soc_pcm_runtime *rtd,
        switch (ret) {
        case -EPROBE_DEFER:
        case -ENOTSUPP:
+       case -EINVAL:
                break;
        default:
                dev_err(rtd->dev,
@@ -2466,8 +2467,11 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
 
        /* there is no point preparing this FE if there are no BEs */
        if (list_empty(&fe->dpcm[stream].be_clients)) {
-               dev_err(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
-                               fe->dai_link->name);
+               /* dev_err_once() for visibility, dev_dbg() for debugging UCM profiles */
+               dev_err_once(fe->dev, "ASoC: no backend DAIs enabled for %s, possibly missing ALSA mixer-based routing or UCM profile\n",
+                            fe->dai_link->name);
+               dev_dbg(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
+                       fe->dai_link->name);
                ret = -EINVAL;
                goto out;
        }
index f351379..f33051e 100644 (file)
@@ -372,6 +372,7 @@ static const struct hda_dai_widget_dma_ops hda_ipc4_chain_dma_ops = {
 static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
                                 struct snd_pcm_substream *substream, int cmd)
 {
+       struct hdac_ext_stream *hext_stream = hda_get_hext_stream(sdev, cpu_dai, substream);
        struct snd_soc_dapm_widget *w = snd_soc_dai_get_widget(cpu_dai, substream->stream);
 
        switch (cmd) {
@@ -379,9 +380,17 @@ static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *c
        case SNDRV_PCM_TRIGGER_STOP:
        {
                struct snd_sof_dai_config_data data = { 0 };
+               int ret;
 
                data.dai_data = DMA_CHAN_INVALID;
-               return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+               ret = hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+               if (ret < 0)
+                       return ret;
+
+               if (cmd == SNDRV_PCM_TRIGGER_STOP)
+                       return hda_link_dma_cleanup(substream, hext_stream, cpu_dai);
+
+               break;
        }
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_PAUSE, NULL);
index 3297dea..863865f 100644 (file)
@@ -107,9 +107,8 @@ hda_dai_get_ops(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai
        return sdai->platform_private;
 }
 
-static int hda_link_dma_cleanup(struct snd_pcm_substream *substream,
-                               struct hdac_ext_stream *hext_stream,
-                               struct snd_soc_dai *cpu_dai)
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+                        struct snd_soc_dai *cpu_dai)
 {
        const struct hda_dai_widget_dma_ops *ops = hda_dai_get_ops(substream, cpu_dai);
        struct sof_intel_hda_stream *hda_stream;
index 3f7c6fb..5b9e4eb 100644 (file)
@@ -963,5 +963,7 @@ const struct hda_dai_widget_dma_ops *
 hda_select_dai_widget_ops(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget);
 int hda_dai_config(struct snd_soc_dapm_widget *w, unsigned int flags,
                   struct snd_sof_dai_config_data *data);
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+                        struct snd_soc_dai *cpu_dai);
 
 #endif
index 2c5aac3..580960f 100644 (file)
@@ -1001,7 +1001,7 @@ void sof_ipc3_do_rx_work(struct snd_sof_dev *sdev, struct sof_ipc_cmd_hdr *hdr,
 
        ipc3_log_header(sdev->dev, "ipc rx", hdr->cmd);
 
-       if (hdr->size < sizeof(hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
+       if (hdr->size < sizeof(*hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
                dev_err(sdev->dev, "The received message size is invalid: %u\n",
                        hdr->size);
                return;
index 0c905bd..027416e 100644 (file)
@@ -708,6 +708,9 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
        struct snd_sof_pcm *spcm;
 
        spcm = snd_sof_find_spcm_dai(component, rtd);
+       if (!spcm)
+               return -EINVAL;
+
        time_info = spcm->stream[substream->stream].private;
        /* delay calculation is not supported by current fw_reg ABI */
        if (!time_info)
index a4e1a70..11361e1 100644 (file)
@@ -1731,6 +1731,9 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 
        *ipc_config_size = ipc_size;
 
+       /* update pipeline memory usage */
+       sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
+
        /* copy IPC data */
        memcpy(*ipc_config_data, (void *)copier_data, sizeof(*copier_data));
        if (gtw_cfg_config_length)
@@ -1743,9 +1746,6 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
                       gtw_cfg_config_length,
                       &ipc4_copier->dma_config_tlv, dma_config_tlv_size);
 
-       /* update pipeline memory usage */
-       sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
-
        return 0;
 }
 
index efb4a33..5d72dc8 100644 (file)
@@ -4507,6 +4507,35 @@ YAMAHA_DEVICE(0x7010, "UB99"),
                }
        }
 },
+{
+       /* Advanced modes of the Mythware XA001AU.
+        * For the standard mode, Mythware XA001AU has ID ffad:a001
+        */
+       USB_DEVICE_VENDOR_SPEC(0xffad, 0xa001),
+       .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+               .vendor_name = "Mythware",
+               .product_name = "XA001AU",
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_COMPOSITE,
+               .data = (const struct snd_usb_audio_quirk[]) {
+                       {
+                               .ifnum = 0,
+                               .type = QUIRK_IGNORE_INTERFACE,
+                       },
+                       {
+                               .ifnum = 1,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+                       },
+                       {
+                               .ifnum = 2,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+                       },
+                       {
+                               .ifnum = -1
+                       }
+               }
+       }
+},
 
 #undef USB_DEVICE_VENDOR_SPEC
 #undef USB_AUDIO_DEVICE
diff --git a/tools/arch/arm64/include/uapi/asm/bitsperlong.h b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..485d60b
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
diff --git a/tools/arch/riscv/include/uapi/asm/bitsperlong.h b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..0b9b58b
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
index cb8ca46..1f6d904 100644 (file)
@@ -14,7 +14,7 @@
  * Defines x86 CPU feature bits
  */
 #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
index 3aedae6..a00a53e 100644 (file)
 #define MSR_AMD64_DE_CFG               0xc0011029
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT   1
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE      BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
 
 #define MSR_AMD64_BU_CFG2              0xc001102a
 #define MSR_AMD64_IBSFETCHCTL          0xc0011030
index a0f4cab..b2c2946 100644 (file)
@@ -40,7 +40,8 @@ $(OUTPUT)counter_example: $(COUNTER_EXAMPLE)
 clean:
        rm -f $(ALL_PROGRAMS)
        rm -rf $(OUTPUT)include/linux/counter.h
-       rmdir -p $(OUTPUT)include/linux
+       rm -df $(OUTPUT)include/linux
+       rm -df $(OUTPUT)include
        find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
 install: $(ALL_PROGRAMS)
index 2e1caab..c0f25d0 100644 (file)
@@ -826,3 +826,9 @@ bool arch_is_rethunk(struct symbol *sym)
 {
        return !strcmp(sym->name, "__x86_return_thunk");
 }
+
+bool arch_is_embedded_insn(struct symbol *sym)
+{
+       return !strcmp(sym->name, "retbleed_return_thunk") ||
+              !strcmp(sym->name, "srso_safe_ret");
+}
index 8936a05..1384090 100644 (file)
@@ -389,7 +389,7 @@ static int decode_instructions(struct objtool_file *file)
                if (!strcmp(sec->name, ".noinstr.text") ||
                    !strcmp(sec->name, ".entry.text") ||
                    !strcmp(sec->name, ".cpuidle.text") ||
-                   !strncmp(sec->name, ".text.__x86.", 12))
+                   !strncmp(sec->name, ".text..__x86.", 13))
                        sec->noinstr = true;
 
                /*
@@ -455,7 +455,7 @@ static int decode_instructions(struct objtool_file *file)
                                return -1;
                        }
 
-                       if (func->return_thunk || func->alias != func)
+                       if (func->embedded_insn || func->alias != func)
                                continue;
 
                        if (!find_insn(file, sec, func->offset)) {
@@ -1288,16 +1288,33 @@ static int add_ignore_alternatives(struct objtool_file *file)
        return 0;
 }
 
+/*
+ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
+ * will be added to the .retpoline_sites section.
+ */
 __weak bool arch_is_retpoline(struct symbol *sym)
 {
        return false;
 }
 
+/*
+ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
+ * will be added to the .return_sites section.
+ */
 __weak bool arch_is_rethunk(struct symbol *sym)
 {
        return false;
 }
 
+/*
+ * Symbols that are embedded inside other instructions, because sometimes crazy
+ * code exists. These are mostly ignored for validation purposes.
+ */
+__weak bool arch_is_embedded_insn(struct symbol *sym)
+{
+       return false;
+}
+
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
        struct reloc *reloc;
@@ -1576,14 +1593,14 @@ static int add_jump_destinations(struct objtool_file *file)
                        struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
 
                        /*
-                        * This is a special case for zen_untrain_ret().
+                        * This is a special case for retbleed_untrain_ret().
                         * It jumps to __x86_return_thunk(), but objtool
                         * can't find the thunk's starting RET
                         * instruction, because the RET is also in the
                         * middle of another instruction.  Objtool only
                         * knows about the outer instruction.
                         */
-                       if (sym && sym->return_thunk) {
+                       if (sym && sym->embedded_insn) {
                                add_return_call(file, insn, false);
                                continue;
                        }
@@ -2502,6 +2519,9 @@ static int classify_symbols(struct objtool_file *file)
                if (arch_is_rethunk(func))
                        func->return_thunk = true;
 
+               if (arch_is_embedded_insn(func))
+                       func->embedded_insn = true;
+
                if (arch_ftrace_match(func->name))
                        func->fentry = true;
 
@@ -2630,12 +2650,17 @@ static int decode_sections(struct objtool_file *file)
        return 0;
 }
 
-static bool is_fentry_call(struct instruction *insn)
+static bool is_special_call(struct instruction *insn)
 {
-       if (insn->type == INSN_CALL &&
-           insn_call_dest(insn) &&
-           insn_call_dest(insn)->fentry)
-               return true;
+       if (insn->type == INSN_CALL) {
+               struct symbol *dest = insn_call_dest(insn);
+
+               if (!dest)
+                       return false;
+
+               if (dest->fentry || dest->embedded_insn)
+                       return true;
+       }
 
        return false;
 }
@@ -3636,7 +3661,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        if (ret)
                                return ret;
 
-                       if (opts.stackval && func && !is_fentry_call(insn) &&
+                       if (opts.stackval && func && !is_special_call(insn) &&
                            !has_valid_stack_frame(&state)) {
                                WARN_INSN(insn, "call without frame pointer save/setup");
                                return 1;
index 2b6d2ce..0b303eb 100644 (file)
@@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base);
 
 bool arch_is_retpoline(struct symbol *sym);
 bool arch_is_rethunk(struct symbol *sym);
+bool arch_is_embedded_insn(struct symbol *sym);
 
 int arch_rewrite_retpolines(struct objtool_file *file);
 
index c532d70..9f71e98 100644 (file)
@@ -66,6 +66,7 @@ struct symbol {
        u8 fentry            : 1;
        u8 profiling_func    : 1;
        u8 warned            : 1;
+       u8 embedded_insn     : 1;
        struct list_head pv_target;
        struct reloc *relocs;
 };
index 4e62843..f4cb41e 100644 (file)
@@ -45,7 +45,6 @@
 
 static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
                                     struct thread *th, bool lock);
-static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip);
 
 static struct dso *machine__kernel_dso(struct machine *machine)
 {
@@ -2385,10 +2384,6 @@ static int add_callchain_ip(struct thread *thread,
        ms.maps = maps__get(al.maps);
        ms.map = map__get(al.map);
        ms.sym = al.sym;
-
-       if (!branch && append_inlines(cursor, &ms, ip) == 0)
-               goto out;
-
        srcline = callchain_srcline(&ms, al.addr);
        err = callchain_cursor_append(cursor, ip, &ms,
                                      branch, flags, nr_loop_iter,
index 7329b33..d45d5dc 100644 (file)
@@ -931,6 +931,11 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
         */
        if (config->aggr_mode == AGGR_THREAD && config->system_wide)
                return true;
+
+       /* Tool events have the software PMU but are only gathered on 1. */
+       if (evsel__is_tool(counter))
+               return true;
+
        /*
         * Skip value 0 when it's an uncore event and the given aggr id
         * does not belong to the PMU cpumask.
index 374d142..c6a0a27 100644 (file)
@@ -1038,9 +1038,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
 
 static bool is_x86_retpoline(const char *name)
 {
-       const char *p = strstr(name, "__x86_indirect_thunk_");
-
-       return p == name || !strcmp(name, "__indirect_thunk_start");
+       return strstr(name, "__x86_indirect_thunk_") == name;
 }
 
 /*
index a61c7bc..63f468b 100644 (file)
@@ -177,7 +177,7 @@ void regression1_test(void)
        nr_threads = 2;
        pthread_barrier_init(&worker_barrier, NULL, nr_threads);
 
-       threads = malloc(nr_threads * sizeof(pthread_t *));
+       threads = malloc(nr_threads * sizeof(*threads));
 
        for (i = 0; i < nr_threads; i++) {
                arg = i;
index b4f6f3a..5674a9d 100644 (file)
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
        xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
 }
 
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+       int s, c0, c1, p0, p1;
+       int err, n, key, value;
+       char buf[] = "abc";
+
+       key = 0;
+       value = sizeof(buf) - 1;
+       err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+       if (err)
+               return;
+
+       s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+       if (s < 0)
+               goto clean_parser_map;
+
+       err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+       if (err)
+               goto close_srv;
+
+       err = add_to_sockmap(sock_map, p0, p1);
+       if (err)
+               goto close;
+
+       n = xsend(c1, buf, sizeof(buf), 0);
+       if (n < sizeof(buf))
+               FAIL("incomplete write");
+
+       n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+       if (n != sizeof(buf) - 1)
+               FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+       xclose(c0);
+       xclose(p0);
+       xclose(c1);
+       xclose(p1);
+close_srv:
+       xclose(s);
+
+clean_parser_map:
+       key = 0;
+       value = 0;
+       xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+                                  struct bpf_map *inner_map, int family,
+                                  int sotype)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+       int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+       int parser_map = bpf_map__fd(skel->maps.parser_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+       if (err)
+               return;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+       if (err)
+               goto detach;
+
+       redir_partial(family, sotype, sock_map, parser_map);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+       xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
 static void test_reuseport_select_listening(int family, int sotype,
                                            int sock_map, int verd_map,
                                            int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
        } tests[] = {
                TEST(test_skb_redir_to_connected),
                TEST(test_skb_redir_to_listening),
+               TEST(test_skb_redir_partial),
                TEST(test_msg_redir_to_connected),
                TEST(test_msg_redir_to_listening),
        };
@@ -1432,7 +1504,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
        if (n < 1)
                goto out;
 
-       n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+       n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
        if (n < 0)
                FAIL("%s: recv() err, errno=%d", log_prefix, errno);
        if (n == 0)
index 325c9f1..464d35b 100644 (file)
@@ -28,12 +28,26 @@ struct {
        __type(value, unsigned int);
 } verdict_map SEC(".maps");
 
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, int);
+       __type(value, int);
+} parser_map SEC(".maps");
+
 bool test_sockmap = false; /* toggled by user-space */
 bool test_ingress = false; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
 int prog_stream_parser(struct __sk_buff *skb)
 {
+       int *value;
+       __u32 key = 0;
+
+       value = bpf_map_lookup_elem(&parser_map, &key);
+       if (value && *value)
+               return *value;
+
        return skb->len;
 }
 
index 54d09b8..c037553 100644 (file)
@@ -4,10 +4,12 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/kernel.h>
+#include <linux/magic.h>
 #include <linux/mman.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
 #include <sys/syscall.h>
+#include <sys/vfs.h>
 #include <unistd.h>
 #include <string.h>
 #include <fcntl.h>
@@ -15,6 +17,8 @@
 
 #include "../kselftest.h"
 
+#define NR_TESTS       9
+
 static const char * const dev_files[] = {
        "/dev/zero", "/dev/null", "/dev/urandom",
        "/proc/version", "/proc"
@@ -91,19 +95,33 @@ out:
 }
 
 /*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+       struct statfs statfs_buf;
+
+       if (fstatfs(fd, &statfs_buf))
+               return false;
+
+       return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
  * Open/create the file at filename, (optionally) write random data to it
  * (exactly num_pages), then test the cachestat syscall on this file.
  *
  * If test_fsync == true, fsync the file, then check the number of dirty
  * pages.
  */
-bool test_cachestat(const char *filename, bool write_random, bool create,
-               bool test_fsync, unsigned long num_pages, int open_flags,
-               mode_t open_mode)
+static int test_cachestat(const char *filename, bool write_random, bool create,
+                         bool test_fsync, unsigned long num_pages,
+                         int open_flags, mode_t open_mode)
 {
        size_t PS = sysconf(_SC_PAGESIZE);
        int filesize = num_pages * PS;
-       bool ret = true;
+       int ret = KSFT_PASS;
        long syscall_ret;
        struct cachestat cs;
        struct cachestat_range cs_range = { 0, filesize };
@@ -112,7 +130,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
        if (fd == -1) {
                ksft_print_msg("Unable to create/open file.\n");
-               ret = false;
+               ret = KSFT_FAIL;
                goto out;
        } else {
                ksft_print_msg("Create/open %s\n", filename);
@@ -121,7 +139,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
        if (write_random) {
                if (!write_exactly(fd, filesize)) {
                        ksft_print_msg("Unable to access urandom.\n");
-                       ret = false;
+                       ret = KSFT_FAIL;
                        goto out1;
                }
        }
@@ -132,7 +150,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
        if (syscall_ret) {
                ksft_print_msg("Cachestat returned non-zero.\n");
-               ret = false;
+               ret = KSFT_FAIL;
                goto out1;
 
        } else {
@@ -142,15 +160,17 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
                        if (cs.nr_cache + cs.nr_evicted != num_pages) {
                                ksft_print_msg(
                                        "Total number of cached and evicted pages is off.\n");
-                               ret = false;
+                               ret = KSFT_FAIL;
                        }
                }
        }
 
        if (test_fsync) {
-               if (fsync(fd)) {
+               if (is_on_tmpfs(fd)) {
+                       ret = KSFT_SKIP;
+               } else if (fsync(fd)) {
                        ksft_print_msg("fsync fails.\n");
-                       ret = false;
+                       ret = KSFT_FAIL;
                } else {
                        syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
 
@@ -161,13 +181,13 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
                                print_cachestat(&cs);
 
                                if (cs.nr_dirty) {
-                                       ret = false;
+                                       ret = KSFT_FAIL;
                                        ksft_print_msg(
                                                "Number of dirty should be zero after fsync.\n");
                                }
                        } else {
                                ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
-                               ret = false;
+                               ret = KSFT_FAIL;
                                goto out1;
                        }
                }
@@ -236,13 +256,29 @@ out:
 
 int main(void)
 {
-       int ret = 0;
+       int ret;
+
+       ksft_print_header();
+
+       ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+       if (ret == -1 && errno == ENOSYS)
+               ksft_exit_skip("cachestat syscall not available\n");
+
+       ksft_set_plan(NR_TESTS);
+
+       if (ret == -1 && errno == EBADF) {
+               ksft_test_result_pass("bad file descriptor recognized\n");
+               ret = 0;
+       } else {
+               ksft_test_result_fail("bad file descriptor ignored\n");
+               ret = 1;
+       }
 
        for (int i = 0; i < 5; i++) {
                const char *dev_filename = dev_files[i];
 
                if (test_cachestat(dev_filename, false, false, false,
-                       4, O_RDONLY, 0400))
+                       4, O_RDONLY, 0400) == KSFT_PASS)
                        ksft_test_result_pass("cachestat works with %s\n", dev_filename);
                else {
                        ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
@@ -251,13 +287,27 @@ int main(void)
        }
 
        if (test_cachestat("tmpfilecachestat", true, true,
-               true, 4, O_CREAT | O_RDWR, 0400 | 0600))
+               false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
                ksft_test_result_pass("cachestat works with a normal file\n");
        else {
                ksft_test_result_fail("cachestat fails with normal file\n");
                ret = 1;
        }
 
+       switch (test_cachestat("tmpfilecachestat", true, true,
+               true, 4, O_CREAT | O_RDWR, 0600)) {
+       case KSFT_FAIL:
+               ksft_test_result_fail("cachestat fsync fails with normal file\n");
+               ret = KSFT_FAIL;
+               break;
+       case KSFT_PASS:
+               ksft_test_result_pass("cachestat fsync works with a normal file\n");
+               break;
+       case KSFT_SKIP:
+               ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+               break;
+       }
+
        if (test_cachestat_shmem())
                ksft_test_result_pass("cachestat works with a shmem file\n");
        else {
index 258ddc5..ed2e50b 100644 (file)
@@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
                goto cleanup;
 
        cg_write(cg, "memory.high", "1M");
+
+       /* wait for RCU freeing */
+       sleep(1);
+
        slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
-       if (slab1 <= 0)
+       if (slab1 < 0)
                goto cleanup;
 
        current = cg_read_long(cg, "memory.current");
-       if (current <= 0)
+       if (current < 0)
                goto cleanup;
 
        if (slab1 < slab0 / 2 && current < slab0 / 2)
index 03f92d7..8a72bb7 100644 (file)
@@ -9,10 +9,12 @@ TEST_PROGS := \
        mode-1-recovery-updelay.sh \
        mode-2-recovery-updelay.sh \
        bond_options.sh \
-       bond-eth-type-change.sh
+       bond-eth-type-change.sh \
+       bond_macvlan.sh
 
 TEST_FILES := \
        lag_lib.sh \
+       bond_topo_2d1c.sh \
        bond_topo_3d1c.sh \
        net_forwarding_lib.sh
 
index 47ab905..6358df5 100755 (executable)
@@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end
 
 # add ports
 ip link set fbond master fab-br0
-ip link set veth1-bond down master fbond
-ip link set veth2-bond down master fbond
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
 
 # bring up
 ip link set veth1-end up
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
new file mode 100755 (executable)
index 0000000..b609fb6
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan over balance-alb
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+m1_ns="m1-$(mktemp -u XXXXXX)"
+m2_ns="m1-$(mktemp -u XXXXXX)"
+m1_ip4="192.0.2.11"
+m1_ip6="2001:db8::11"
+m2_ip4="192.0.2.12"
+m2_ip6="2001:db8::12"
+
+cleanup()
+{
+       ip -n ${m1_ns} link del macv0
+       ip netns del ${m1_ns}
+       ip -n ${m2_ns} link del macv0
+       ip netns del ${m2_ns}
+
+       client_destroy
+       server_destroy
+       gateway_destroy
+}
+
+check_connection()
+{
+       local ns=${1}
+       local target=${2}
+       local message=${3:-"macvlan_over_bond"}
+       RET=0
+
+
+       ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+       check_err $? "ping failed"
+       log_test "$mode: $message"
+}
+
+macvlan_over_bond()
+{
+       local param="$1"
+       RET=0
+
+       # setup new bond mode
+       bond_reset "${param}"
+
+       ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+       ip -n ${s_ns} link set macv0 netns ${m1_ns}
+       ip -n ${m1_ns} link set dev macv0 up
+       ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
+       ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
+
+       ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+       ip -n ${s_ns} link set macv0 netns ${m2_ns}
+       ip -n ${m2_ns} link set dev macv0 up
+       ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
+       ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
+
+       sleep 2
+
+       check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+       check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+       check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
+       check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
+       check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
+       check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
+       check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
+       check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
+
+
+       sleep 5
+
+       check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+       check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+       check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
+       check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
+       check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
+       check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
+       check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
+       check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
+
+       ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${m1_ns}
+ip netns add ${m2_ns}
+
+modes="active-backup balance-tlb balance-alb"
+
+for mode in $modes; do
+       macvlan_over_bond "mode $mode"
+done
+
+exit $EXIT_STATUS
index 607ba5c..c54d169 100755 (executable)
@@ -9,10 +9,7 @@ ALL_TESTS="
        num_grat_arp
 "
 
-REQUIRE_MZ=no
-NUM_NETIFS=0
 lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
 source ${lib_dir}/bond_topo_3d1c.sh
 
 skip_prio()
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644 (file)
index 0000000..a509ef9
--- /dev/null
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+#  +-------------------------+
+#  |          bond0          |  Server
+#  |            +            |  192.0.2.1/24
+#  |      eth0  |  eth1      |  2001:db8::1/24
+#  |        +---+---+        |
+#  |        |       |        |
+#  +-------------------------+
+#           |       |
+#  +-------------------------+
+#  |        |       |        |
+#  |    +---+-------+---+    |  Gateway
+#  |    |      br0      |    |  192.0.2.254/24
+#  |    +-------+-------+    |  2001:db8::254/24
+#  |            |            |
+#  +-------------------------+
+#               |
+#  +-------------------------+
+#  |            |            |  Client
+#  |            +            |  192.0.2.10/24
+#  |          eth0           |  2001:db8::10/24
+#  +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+       ip netns add ${g_ns}
+       ip -n ${g_ns} link add br0 type bridge
+       ip -n ${g_ns} link set br0 up
+       ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+       ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+       ip -n ${g_ns} link del br0
+       ip netns del ${g_ns}
+}
+
+server_create()
+{
+       ip netns add ${s_ns}
+       ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+       for i in $(seq 0 1); do
+               ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+               ip -n ${g_ns} link set s${i} up
+               ip -n ${g_ns} link set s${i} master br0
+               ip -n ${s_ns} link set eth${i} master bond0
+
+               tc -n ${g_ns} qdisc add dev s${i} clsact
+       done
+
+       ip -n ${s_ns} link set bond0 up
+       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+       sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+       # Count the eth link number in real-time as this function
+       # maybe called from other topologies.
+       local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+       local param="$1"
+       link_num=$((link_num -1))
+
+       ip -n ${s_ns} link set bond0 down
+       ip -n ${s_ns} link del bond0
+
+       ip -n ${s_ns} link add bond0 type bond $param
+       for i in $(seq 0 ${link_num}); do
+               ip -n ${s_ns} link set eth$i master bond0
+       done
+
+       ip -n ${s_ns} link set bond0 up
+       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+       sleep 2
+}
+
+server_destroy()
+{
+       # Count the eth link number in real-time as this function
+       # maybe called from other topologies.
+       local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+       link_num=$((link_num -1))
+       for i in $(seq 0 ${link_num}); do
+               ip -n ${s_ns} link del eth${i}
+       done
+       ip netns del ${s_ns}
+}
+
+client_create()
+{
+       ip netns add ${c_ns}
+       ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+       ip -n ${g_ns} link set c0 up
+       ip -n ${g_ns} link set c0 master br0
+
+       ip -n ${c_ns} link set eth0 up
+       ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+       ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+       ip -n ${c_ns} link del eth0
+       ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+       gateway_create
+       server_create
+       client_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       client_destroy
+       server_destroy
+       gateway_destroy
+}
+
+bond_check_connection()
+{
+       local msg=${1:-"check connection"}
+
+       sleep 2
+       ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+       check_err $? "${msg}: ping failed"
+       ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+       check_err $? "${msg}: ping6 failed"
+}
index 69ab99a..3a1333d 100644 (file)
 #  |                eth0                 |  2001:db8::10/24
 #  +-------------------------------------+
 
-s_ns="s-$(mktemp -u XXXXXX)"
-c_ns="c-$(mktemp -u XXXXXX)"
-g_ns="g-$(mktemp -u XXXXXX)"
-s_ip4="192.0.2.1"
-c_ip4="192.0.2.10"
-g_ip4="192.0.2.254"
-s_ip6="2001:db8::1"
-c_ip6="2001:db8::10"
-g_ip6="2001:db8::254"
-
-gateway_create()
-{
-       ip netns add ${g_ns}
-       ip -n ${g_ns} link add br0 type bridge
-       ip -n ${g_ns} link set br0 up
-       ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
-       ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
-}
-
-gateway_destroy()
-{
-       ip -n ${g_ns} link del br0
-       ip netns del ${g_ns}
-}
-
-server_create()
-{
-       ip netns add ${s_ns}
-       ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
-
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
-
-               ip -n ${g_ns} link set s${i} up
-               ip -n ${g_ns} link set s${i} master br0
-               ip -n ${s_ns} link set eth${i} master bond0
-
-               tc -n ${g_ns} qdisc add dev s${i} clsact
-       done
-
-       ip -n ${s_ns} link set bond0 up
-       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
-       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
-       sleep 2
-}
-
-# Reset bond with new mode and options
-bond_reset()
-{
-       local param="$1"
-
-       ip -n ${s_ns} link set bond0 down
-       ip -n ${s_ns} link del bond0
-
-       ip -n ${s_ns} link add bond0 type bond $param
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link set eth$i master bond0
-       done
-
-       ip -n ${s_ns} link set bond0 up
-       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
-       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
-       sleep 2
-}
-
-server_destroy()
-{
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link del eth${i}
-       done
-       ip netns del ${s_ns}
-}
-
-client_create()
-{
-       ip netns add ${c_ns}
-       ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
-
-       ip -n ${g_ns} link set c0 up
-       ip -n ${g_ns} link set c0 master br0
-
-       ip -n ${c_ns} link set eth0 up
-       ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
-       ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
-}
-
-client_destroy()
-{
-       ip -n ${c_ns} link del eth0
-       ip netns del ${c_ns}
-}
+source bond_topo_2d1c.sh
 
 setup_prepare()
 {
        gateway_create
        server_create
        client_create
-}
-
-cleanup()
-{
-       pre_cleanup
-
-       client_destroy
-       server_destroy
-       gateway_destroy
-}
-
-bond_check_connection()
-{
-       local msg=${1:-"check connection"}
 
-       sleep 2
-       ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
-       check_err $? "${msg}: ping failed"
-       ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
-       check_err $? "${msg}: ping6 failed"
+       # Add the extra device as we use 3 down links for bond0
+       local i=2
+       ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+       ip -n ${g_ns} link set s${i} up
+       ip -n ${g_ns} link set s${i} master br0
+       ip -n ${s_ns} link set eth${i} master bond0
+       tc -n ${g_ns} qdisc add dev s${i} clsact
 }
index 7d9e73a..0c47faf 100755 (executable)
@@ -98,12 +98,12 @@ sb_occ_etc_check()
 
 port_pool_test()
 {
-       local exp_max_occ=288
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+       $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
                -t ip -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
@@ -126,12 +126,12 @@ port_pool_test()
 
 port_tc_ip_test()
 {
-       local exp_max_occ=288
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+       $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
                -t ip -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
@@ -154,16 +154,12 @@ port_tc_ip_test()
 
 port_tc_arp_test()
 {
-       local exp_max_occ=96
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
-       if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
-               exp_max_occ=144
-       fi
-
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+       $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
 
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644 (file)
index 0000000..63b76cf
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
index 72ba48f..a3bb36f 100644 (file)
@@ -23,6 +23,7 @@ LIBKVM += lib/guest_modes.c
 LIBKVM += lib/io.c
 LIBKVM += lib/kvm_util.c
 LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
@@ -122,6 +123,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += guest_print_test
 TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
@@ -151,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
+TEST_GEN_PROGS_aarch64 += guest_print_test
 TEST_GEN_PROGS_aarch64 += get-reg-list
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
@@ -166,8 +169,10 @@ TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
 TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += guest_print_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 TEST_GEN_PROGS_s390x += kvm_page_table_test
 TEST_GEN_PROGS_s390x += rseq_test
@@ -176,6 +181,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
 TEST_GEN_PROGS_riscv += demand_paging_test
 TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += guest_print_test
 TEST_GEN_PROGS_riscv += get-reg-list
 TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
 TEST_GEN_PROGS_riscv += kvm_page_table_test
@@ -207,6 +213,7 @@ endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -Wno-gnu-variable-sized-type-not-at-end -MD\
        -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
+       -fno-builtin-strnlen \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
        -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
index 4951ac5..b905808 100644 (file)
@@ -98,7 +98,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                /*
                 * Expect the ioctl to succeed with no effect on the register
@@ -107,7 +107,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
@@ -127,14 +127,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
                TEST_ASSERT(r < 0 && errno == EINVAL,
                            "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
index 8ef3709..274b846 100644 (file)
@@ -19,7 +19,6 @@
  *
  * Copyright (c) 2021, Google LLC.
  */
-
 #define _GNU_SOURCE
 
 #include <stdlib.h>
@@ -155,11 +154,13 @@ static void guest_validate_irq(unsigned int intid,
        xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
 
        /* Make sure we are dealing with the correct timer IRQ */
-       GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+       GUEST_ASSERT_EQ(intid, timer_irq);
 
        /* Basic 'timer condition met' check */
-       GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
-       GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+       __GUEST_ASSERT(xcnt >= cval,
+                      "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+                      xcnt, cval, xcnt_diff_us);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
 
        WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
 }
@@ -192,8 +193,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
                        TIMER_TEST_ERR_MARGIN_US);
 
                irq_iter = READ_ONCE(shared_data->nr_iter);
-               GUEST_ASSERT_2(config_iter + 1 == irq_iter,
-                               config_iter + 1, irq_iter);
+               GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
        }
 }
 
@@ -243,13 +243,9 @@ static void *test_vcpu_run(void *arg)
                break;
        case UCALL_ABORT:
                sync_global_from_guest(vm, *shared_data);
-               REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u",
-                                     GUEST_ASSERT_ARG(uc, 0),
-                                     GUEST_ASSERT_ARG(uc, 1),
-                                     GUEST_ASSERT_ARG(uc, 2),
-                                     vcpu_idx,
-                                     shared_data->guest_stage,
-                                     shared_data->nr_iter);
+               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
+                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Unexpected guest exit\n");
index 637be79..f5b6cb3 100644 (file)
@@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs)
 
 static void guest_ss_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
        ss_addr[ss_idx++] = regs->pc;
        regs->pstate |= SPSR_SS;
 }
@@ -410,8 +410,8 @@ static void guest_code_ss(int test_cnt)
                /* Userspace disables Single Step when the end is nigh. */
                asm volatile("iter_ss_end:\n");
 
-               GUEST_ASSERT(bvr == w_bvr);
-               GUEST_ASSERT(wvr == w_wvr);
+               GUEST_ASSERT_EQ(bvr, w_bvr);
+               GUEST_ASSERT_EQ(wvr, w_wvr);
        }
        GUEST_DONE();
 }
@@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp
        vcpu_run(vcpu);
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                goto done;
index bef1499..31f66ba 100644 (file)
@@ -8,7 +8,6 @@
  * hypercalls are properly masked or unmasked to the guest when disabled or
  * enabled from the KVM userspace, respectively.
  */
-
 #include <errno.h>
 #include <linux/arm-smccc.h>
 #include <asm/kvm.h>
@@ -105,15 +104,17 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
                switch (stage) {
                case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
                case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
        }
 }
@@ -132,7 +133,7 @@ static void guest_code(void)
                        guest_test_hvc(false_hvc_info);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
 
                GUEST_SYNC(stage);
@@ -290,10 +291,7 @@ static void test_run(void)
                        guest_done = true;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u",
-                                             GUEST_ASSERT_ARG(uc, 0),
-                                             GUEST_ASSERT_ARG(uc, 1),
-                                             GUEST_ASSERT_ARG(uc, 2), stage);
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                default:
                        TEST_FAIL("Unexpected guest exit\n");
index df10f1f..47bb914 100644 (file)
@@ -7,7 +7,6 @@
  * hugetlbfs with a hole). It checks that the expected handling method is
  * called (e.g., uffd faults with the right address and write/read flag).
  */
-
 #define _GNU_SOURCE
 #include <linux/bitmap.h>
 #include <fcntl.h>
@@ -293,12 +292,12 @@ static void guest_code(struct test_desc *test)
 
 static void no_dabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, read_sysreg(far_el1));
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
 }
 
 static void no_iabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, regs->pc);
+       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
 }
 
 static struct uffd_args {
@@ -318,7 +317,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
 
        TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
                    "The only expected UFFD mode is MISSING");
-       ASSERT_EQ(addr, (uint64_t)args->hva);
+       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
 
        pr_debug("uffd fault: addr=%p write=%d\n",
                 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
@@ -432,7 +431,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
        region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
        hva = (void *)region->region.userspace_addr;
 
-       ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
 
        memcpy(hva, run->mmio.data, run->mmio.len);
        events.mmio_exits += 1;
@@ -631,9 +630,9 @@ static void setup_default_handlers(struct test_desc *test)
 
 static void check_event_counts(struct test_desc *test)
 {
-       ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
-       ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
-       ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
 }
 
 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
@@ -679,7 +678,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
                        }
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index 90d854e..2e64b48 100644 (file)
@@ -7,7 +7,6 @@
  * host to inject a specific intid via a GUEST_SYNC call, and then checks that
  * it received it.
  */
-
 #include <asm/kvm.h>
 #include <asm/kvm_para.h>
 #include <sys/eventfd.h>
@@ -781,7 +780,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
                        run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
new file mode 100644 (file)
index 0000000..41230b7
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct guest_vals {
+       uint64_t a;
+       uint64_t b;
+       uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST                                      \
+TYPE(test_type_i64,  I64,  "%ld",   int64_t)           \
+TYPE(test_type_u64,  U64u, "%lu",   uint64_t)          \
+TYPE(test_type_x64,  U64x, "0x%lx", uint64_t)          \
+TYPE(test_type_X64,  U64X, "0x%lX", uint64_t)          \
+TYPE(test_type_u32,  U32u, "%u",    uint32_t)          \
+TYPE(test_type_x32,  U32x, "0x%x",  uint32_t)          \
+TYPE(test_type_X32,  U32X, "0x%X",  uint32_t)          \
+TYPE(test_type_int,  INT,  "%d",    int)               \
+TYPE(test_type_char, CHAR, "%c",    char)              \
+TYPE(test_type_str,  STR,  "'%s'",  const char *)      \
+TYPE(test_type_ptr,  PTR,  "%p",    uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+       TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)                    \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t;    \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead";  \
+static void fn(struct kvm_vcpu *vcpu, T a, T b)                                     \
+{                                                                           \
+       char expected_printf[UCALL_BUFFER_LEN];                              \
+       char expected_assert[UCALL_BUFFER_LEN];                              \
+                                                                            \
+       snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+       snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+       vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext };  \
+       sync_global_to_guest(vcpu->vm, vals);                                \
+       run_test(vcpu, expected_printf, expected_assert);                    \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+               BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+       TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+       while (1) {
+               switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T)                                                        \
+               case TYPE_##ext:                                                \
+                       GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b);         \
+                       __GUEST_ASSERT(vals.a == vals.b,                        \
+                                      ASSERT_FMT_##ext, vals.a, vals.b);       \
+                       break;
+               TYPE_LIST
+#undef TYPE
+               default:
+                       GUEST_SYNC(vals.type);
+               }
+
+               GUEST_DONE();
+       }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info.  Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+       int len_str = strlen(assert_msg);
+       int len_substr = strlen(expected_assert_msg);
+       int offset = len_str - len_substr;
+
+       TEST_ASSERT(len_substr <= len_str,
+                   "Expected '%s' to be a substring of '%s'\n",
+                   assert_msg, expected_assert_msg);
+
+       TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                   expected_assert_msg, &assert_msg[offset]);
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert)
+{
+       struct kvm_run *run = vcpu->run;
+       struct ucall uc;
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                           "Unexpected exit reason: %u (%s),\n",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+                       break;
+               case UCALL_PRINTF:
+                       TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+                                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                                   expected_printf, uc.buffer);
+                       break;
+               case UCALL_ABORT:
+                       ucall_abort(uc.buffer, expected_assert);
+                       break;
+               case UCALL_DONE:
+                       return;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
+
+static void guest_code_limits(void)
+{
+       char test_str[UCALL_BUFFER_LEN + 10];
+
+       memset(test_str, 'a', sizeof(test_str));
+       test_str[sizeof(test_str) - 1] = 0;
+
+       GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+       run = vcpu->run;
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+                   "Unexpected ucall command: %lu,  Expected: %u (UCALL_ABORT)\n",
+                   uc.cmd, UCALL_ABORT);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       test_type_i64(vcpu, -1, -1);
+       test_type_i64(vcpu, -1,  1);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+       test_type_int(vcpu, -1, -1);
+       test_type_int(vcpu, -1,  1);
+       test_type_int(vcpu,  1,  1);
+
+       test_type_char(vcpu, 'a', 'a');
+       test_type_char(vcpu, 'a', 'A');
+       test_type_char(vcpu, 'a', 'b');
+
+       test_type_str(vcpu, "foo", "foo");
+       test_type_str(vcpu, "foo", "bar");
+
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       kvm_vm_free(vm);
+
+       test_limits();
+
+       return 0;
+}
index cb7c03d..b3e9752 100644 (file)
@@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntpct_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
                write_sysreg(cval, cntp_cval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -72,7 +72,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_cval_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -89,7 +89,7 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
                write_sysreg(tval, cntp_tval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -105,7 +105,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
                write_sysreg(ctl, cntp_ctl_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -119,7 +119,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_ctl_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
new file mode 100644 (file)
index 0000000..4b68f37
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
new file mode 100644 (file)
index 0000000..be46eb3
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+                 KVM_RISCV_SELFTESTS_SBI_UCALL,
+                 uc, 0, 0, 0, 0, 0);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
new file mode 100644 (file)
index 0000000..b231bf2
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
index 7e0182f..7e614ad 100644 (file)
@@ -53,14 +53,13 @@ void test_assert(bool exp, const char *exp_str,
 #define TEST_ASSERT(e, fmt, ...) \
        test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
-#define ASSERT_EQ(a, b) do { \
-       typeof(a) __a = (a); \
-       typeof(b) __b = (b); \
-       TEST_ASSERT(__a == __b, \
-                   "ASSERT_EQ(%s, %s) failed.\n" \
-                   "\t%s is %#lx\n" \
-                   "\t%s is %#lx", \
-                   #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b)                                           \
+do {                                                                   \
+       typeof(a) __a = (a);                                            \
+       typeof(b) __b = (b);                                            \
+       test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__,       \
+                   "%#lx != %#lx (%s != %s)",                          \
+                   (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
 } while (0)
 
 #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do {               \
@@ -186,6 +185,9 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
        return num;
 }
 
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
 char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
 
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
index 1a6aaef..112bc1d 100644 (file)
@@ -7,21 +7,25 @@
 #ifndef SELFTEST_KVM_UCALL_COMMON_H
 #define SELFTEST_KVM_UCALL_COMMON_H
 #include "test_util.h"
+#include "ucall.h"
 
 /* Common ucalls */
 enum {
        UCALL_NONE,
        UCALL_SYNC,
        UCALL_ABORT,
+       UCALL_PRINTF,
        UCALL_DONE,
        UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
 
 struct ucall {
        uint64_t cmd;
        uint64_t args[UCALL_MAX_ARGS];
+       char buffer[UCALL_BUFFER_LEN];
 
        /* Host virtual address of this struct. */
        struct ucall *hva;
@@ -32,8 +36,12 @@ void ucall_arch_do_ucall(vm_vaddr_t uc);
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
 void ucall(uint64_t cmd, int nargs, ...);
+void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
 
 /*
  * Perform userspace call without any associated data.  This bare call avoids
@@ -46,8 +54,11 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
                                ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)      ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
 #define GUEST_DONE()           ucall(UCALL_DONE, 0)
 
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
 enum guest_assert_builtin_args {
        GUEST_ERROR_STRING,
        GUEST_FILE,
@@ -55,70 +66,41 @@ enum guest_assert_builtin_args {
        GUEST_ASSERT_BUILTIN_NARGS
 };
 
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...)         \
-do {                                                                   \
-       if (!(_condition))                                              \
-               ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \
-                     "Failed guest assert: " _condstr,                 \
-                     __FILE__, __LINE__, ##_args);                     \
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...)                             \
+do {                                                                                   \
+       if (!(_condition))                                                              \
+               ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args);     \
 } while (0)
 
-#define GUEST_ASSERT(_condition) \
-       __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
-       __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
-       __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-       __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-       __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+#define __GUEST_ASSERT(_condition, _fmt, _args...)                             \
+       ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
 
-#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...)                   \
-       TEST_FAIL("%s at %s:%ld\n" fmt,                                 \
-                 (const char *)(_ucall).args[GUEST_ERROR_STRING],      \
-                 (const char *)(_ucall).args[GUEST_FILE],              \
-                 (_ucall).args[GUEST_LINE],                            \
-                 ##_args)
+#define GUEST_ASSERT(_condition)                                               \
+       __GUEST_ASSERT(_condition, #_condition)
 
-#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i])
+#define GUEST_FAIL(_fmt, _args...)                                             \
+       ucall_assert(UCALL_ABORT, "Unconditional guest failure",                \
+                    __FILE__, __LINE__, _fmt, ##_args)
 
-#define REPORT_GUEST_ASSERT(ucall)             \
-       __REPORT_GUEST_ASSERT((ucall), "")
-
-#define REPORT_GUEST_ASSERT_1(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0))
-
-#define REPORT_GUEST_ASSERT_2(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1))
-
-#define REPORT_GUEST_ASSERT_3(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2))
+#define GUEST_ASSERT_EQ(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_4(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2),     \
-                             GUEST_ASSERT_ARG((ucall), 3))
+#define GUEST_ASSERT_NE(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...)     \
-       __REPORT_GUEST_ASSERT((ucall), fmt, ##args)
+#define REPORT_GUEST_ASSERT(ucall)                                             \
+       test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING],      \
+                   (const char *)(ucall).args[GUEST_FILE],                     \
+                   (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
 
 #endif /* SELFTEST_KVM_UCALL_COMMON_H */
index aa434c8..4fd0421 100644 (file)
@@ -239,7 +239,12 @@ struct kvm_x86_cpu_property {
 #define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
 #define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
 #define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
 #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
 
 #define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
new file mode 100644 (file)
index 0000000..06b244b
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
index b3b00be..69f26d8 100644 (file)
@@ -200,7 +200,7 @@ static void *vcpu_worker(void *data)
                if (READ_ONCE(host_quit))
                        return NULL;
 
-               clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+               clock_gettime(CLOCK_MONOTONIC, &start);
                ret = _vcpu_run(vcpu);
                ts_diff = timespec_elapsed(start);
 
@@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        /* Test the stage of KVM creating mappings */
        *current_stage = KVM_CREATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_UPDATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_ADJUST_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
index f212bd8..ddab0ce 100644 (file)
@@ -6,11 +6,7 @@
  */
 #include "kvm_util.h"
 
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
 
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
@@ -23,11 +19,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
        write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644 (file)
index 0000000..c4a69d8
--- /dev/null
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do {                                   \
+       GUEST_ASSERT(str < end);        \
+       *str++ = (v);                   \
+} while (0)
+
+static int isdigit(int ch)
+{
+       return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+       int i = 0;
+
+       while (isdigit(**s))
+               i = i * 10 + *((*s)++) - '0';
+       return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SMALL  32              /* Must be 32 == 0x20 */
+#define SPECIAL        64              /* 0x */
+
+#define __do_div(n, base)                              \
+({                                                     \
+       int __res;                                      \
+                                                       \
+       __res = ((uint64_t) n) % (uint32_t) base;       \
+       n = ((uint64_t) n) / (uint32_t) base;           \
+       __res;                                          \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+                   int precision, int type)
+{
+       /* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+       static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+       char tmp[66];
+       char c, sign, locase;
+       int i;
+
+       /*
+        * locase = 0 or 0x20. ORing digits or letters with 'locase'
+        * produces same digits or (maybe lowercased) letters
+        */
+       locase = (type & SMALL);
+       if (type & LEFT)
+               type &= ~ZEROPAD;
+       if (base < 2 || base > 16)
+               return NULL;
+       c = (type & ZEROPAD) ? '0' : ' ';
+       sign = 0;
+       if (type & SIGN) {
+               if (num < 0) {
+                       sign = '-';
+                       num = -num;
+                       size--;
+               } else if (type & PLUS) {
+                       sign = '+';
+                       size--;
+               } else if (type & SPACE) {
+                       sign = ' ';
+                       size--;
+               }
+       }
+       if (type & SPECIAL) {
+               if (base == 16)
+                       size -= 2;
+               else if (base == 8)
+                       size--;
+       }
+       i = 0;
+       if (num == 0)
+               tmp[i++] = '0';
+       else
+               while (num != 0)
+                       tmp[i++] = (digits[__do_div(num, base)] | locase);
+       if (i > precision)
+               precision = i;
+       size -= precision;
+       if (!(type & (ZEROPAD + LEFT)))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, ' ');
+       if (sign)
+               APPEND_BUFFER_SAFE(str, end, sign);
+       if (type & SPECIAL) {
+               if (base == 8)
+                       APPEND_BUFFER_SAFE(str, end, '0');
+               else if (base == 16) {
+                       APPEND_BUFFER_SAFE(str, end, '0');
+                       APPEND_BUFFER_SAFE(str, end, 'x');
+               }
+       }
+       if (!(type & LEFT))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, c);
+       while (i < precision--)
+               APPEND_BUFFER_SAFE(str, end, '0');
+       while (i-- > 0)
+               APPEND_BUFFER_SAFE(str, end, tmp[i]);
+       while (size-- > 0)
+               APPEND_BUFFER_SAFE(str, end, ' ');
+
+       return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+       char *str, *end;
+       const char *s;
+       uint64_t num;
+       int i, base;
+       int len;
+
+       int flags;              /* flags to number() */
+
+       int field_width;        /* width of output field */
+       int precision;          /*
+                                * min. # of digits for integers; max
+                                * number of chars for from string
+                                */
+       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
+
+       end = buf + n;
+       GUEST_ASSERT(buf < end);
+       GUEST_ASSERT(n > 0);
+
+       for (str = buf; *fmt; ++fmt) {
+               if (*fmt != '%') {
+                       APPEND_BUFFER_SAFE(str, end, *fmt);
+                       continue;
+               }
+
+               /* process flags */
+               flags = 0;
+repeat:
+               ++fmt;          /* this also skips first '%' */
+               switch (*fmt) {
+               case '-':
+                       flags |= LEFT;
+                       goto repeat;
+               case '+':
+                       flags |= PLUS;
+                       goto repeat;
+               case ' ':
+                       flags |= SPACE;
+                       goto repeat;
+               case '#':
+                       flags |= SPECIAL;
+                       goto repeat;
+               case '0':
+                       flags |= ZEROPAD;
+                       goto repeat;
+               }
+
+               /* get field width */
+               field_width = -1;
+               if (isdigit(*fmt))
+                       field_width = skip_atoi(&fmt);
+               else if (*fmt == '*') {
+                       ++fmt;
+                       /* it's the next argument */
+                       field_width = va_arg(args, int);
+                       if (field_width < 0) {
+                               field_width = -field_width;
+                               flags |= LEFT;
+                       }
+               }
+
+               /* get the precision */
+               precision = -1;
+               if (*fmt == '.') {
+                       ++fmt;
+                       if (isdigit(*fmt))
+                               precision = skip_atoi(&fmt);
+                       else if (*fmt == '*') {
+                               ++fmt;
+                               /* it's the next argument */
+                               precision = va_arg(args, int);
+                       }
+                       if (precision < 0)
+                               precision = 0;
+               }
+
+               /* get the conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+                       qualifier = *fmt;
+                       ++fmt;
+               }
+
+               /* default base */
+               base = 10;
+
+               switch (*fmt) {
+               case 'c':
+                       if (!(flags & LEFT))
+                               while (--field_width > 0)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       APPEND_BUFFER_SAFE(str, end,
+                                           (uint8_t)va_arg(args, int));
+                       while (--field_width > 0)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 's':
+                       s = va_arg(args, char *);
+                       len = strnlen(s, precision);
+
+                       if (!(flags & LEFT))
+                               while (len < field_width--)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       for (i = 0; i < len; ++i)
+                               APPEND_BUFFER_SAFE(str, end, *s++);
+                       while (len < field_width--)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 'p':
+                       if (field_width == -1) {
+                               field_width = 2 * sizeof(void *);
+                               flags |= SPECIAL | SMALL | ZEROPAD;
+                       }
+                       str = number(str, end,
+                                    (uint64_t)va_arg(args, void *), 16,
+                                    field_width, precision, flags);
+                       continue;
+
+               case 'n':
+                       if (qualifier == 'l') {
+                               long *ip = va_arg(args, long *);
+                               *ip = (str - buf);
+                       } else {
+                               int *ip = va_arg(args, int *);
+                               *ip = (str - buf);
+                       }
+                       continue;
+
+               case '%':
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       continue;
+
+               /* integer number formats - set up the flags and "break" */
+               case 'o':
+                       base = 8;
+                       break;
+
+               case 'x':
+                       flags |= SMALL;
+               case 'X':
+                       base = 16;
+                       break;
+
+               case 'd':
+               case 'i':
+                       flags |= SIGN;
+               case 'u':
+                       break;
+
+               default:
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       if (*fmt)
+                               APPEND_BUFFER_SAFE(str, end, *fmt);
+                       else
+                               --fmt;
+                       continue;
+               }
+               if (qualifier == 'l')
+                       num = va_arg(args, uint64_t);
+               else if (qualifier == 'h') {
+                       num = (uint16_t)va_arg(args, int);
+                       if (flags & SIGN)
+                               num = (int16_t)num;
+               } else if (flags & SIGN)
+                       num = va_arg(args, int);
+               else
+                       num = va_arg(args, uint32_t);
+               str = number(str, end, num, base, field_width, precision, flags);
+       }
+
+       GUEST_ASSERT(str < end);
+       *str = '\0';
+       return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+       va_list va;
+       int len;
+
+       va_start(va, fmt);
+       len = guest_vsnprintf(buf, n, fmt, va);
+       va_end(va);
+
+       return len;
+}
index 9741a7f..7a8af18 100644 (file)
@@ -312,6 +312,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
                                     uint32_t nr_runnable_vcpus,
                                     uint64_t extra_mem_pages)
 {
+       uint64_t page_size = vm_guest_mode_params[mode].page_size;
        uint64_t nr_pages;
 
        TEST_ASSERT(nr_runnable_vcpus,
@@ -340,6 +341,9 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
         */
        nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
 
+       /* Account for the number of pages needed by ucall. */
+       nr_pages += ucall_nr_pages_required(page_size);
+
        return vm_adjust_num_guest_pages(mode, nr_pages);
 }
 
@@ -994,7 +998,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
        if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
                alignment = max(backing_src_pagesz, alignment);
 
-       ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+       TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
 
        /* Add enough memory to align up if necessary */
        if (alignment > 1)
index 9a3476a..fe6d100 100644 (file)
 #include "kvm_util.h"
 #include "processor.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
                        unsigned long arg1, unsigned long arg2,
                        unsigned long arg3, unsigned long arg4,
@@ -40,13 +36,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
        return ret;
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
-                 KVM_RISCV_SELFTESTS_SBI_UCALL,
-                 uc, 0, 0, 0, 0, 0);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index a7f02dc..cca9873 100644 (file)
@@ -6,16 +6,6 @@
  */
 #include "kvm_util.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index 50e0cf4..88cb6b8 100644 (file)
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
                                tmp = node_prev(s, nodep);
 
                        node_rm(s, nodep);
-                       nodep = NULL;
 
                        nodep = tmp;
                        reduction_performed = true;
index 632398a..5d1c872 100644 (file)
@@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count)
                *xs++ = c;
        return s;
 }
+
+size_t strnlen(const char *s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
index 2f0e2ea..816a3fa 100644 (file)
@@ -11,6 +11,11 @@ struct ucall_header {
        struct ucall ucalls[KVM_MAX_VCPUS];
 };
 
+int ucall_nr_pages_required(uint64_t page_size)
+{
+       return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
 /*
  * ucall_pool holds per-VM values (global data is duplicated by each VM), it
  * must not be accessed from host code.
@@ -70,6 +75,45 @@ static void ucall_free(struct ucall *uc)
        clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
 }
 
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+       WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+       WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
 void ucall(uint64_t cmd, int nargs, ...)
 {
        struct ucall *uc;
index d4a0b50..d828837 100644 (file)
@@ -1074,11 +1074,6 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
        return true;
 }
 
-void kvm_exit_unexpected_vector(uint32_t value)
-{
-       ucall(UCALL_UNHANDLED, 1, value);
-}
-
 void route_exception(struct ex_regs *regs)
 {
        typedef void(*handler)(struct ex_regs *);
@@ -1092,7 +1087,10 @@ void route_exception(struct ex_regs *regs)
        if (kvm_fixup_exception(regs))
                return;
 
-       kvm_exit_unexpected_vector(regs->vector);
+       ucall_assert(UCALL_UNHANDLED,
+                    "Unhandled exception in guest", __FILE__, __LINE__,
+                    "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+                    regs->vector, regs->rip);
 }
 
 void vm_init_descriptor_tables(struct kvm_vm *vm)
@@ -1135,12 +1133,8 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
 {
        struct ucall uc;
 
-       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
-               uint64_t vector = uc.args[0];
-
-               TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
-                         vector);
-       }
+       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+               REPORT_GUEST_ASSERT(uc);
 }
 
 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
index 4d41dc6..1265cec 100644 (file)
@@ -8,14 +8,38 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-       asm volatile("in %[port], %%al"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
+       /*
+        * FIXME: Revert this hack (the entire commit that added it) once nVMX
+        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
+        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
+        * in particular is problematic) along with RDX and RDI (which are
+        * inputs), and clobber volatile GPRs. *sigh*
+        */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+       "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+       asm volatile("push %%rbp\n\t"
+                    "push %%r15\n\t"
+                    "push %%r14\n\t"
+                    "push %%r13\n\t"
+                    "push %%r12\n\t"
+                    "push %%rbx\n\t"
+                    "push %%rdx\n\t"
+                    "push %%rdi\n\t"
+                    "in %[port], %%al\n\t"
+                    "pop %%rdi\n\t"
+                    "pop %%rdx\n\t"
+                    "pop %%rbx\n\t"
+                    "pop %%r12\n\t"
+                    "pop %%r13\n\t"
+                    "pop %%r14\n\t"
+                    "pop %%r15\n\t"
+                    "pop %%rbp\n\t"
+               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
 }
 
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
index feaf2be..6628dc4 100644 (file)
@@ -55,7 +55,7 @@ static void rendezvous_with_boss(void)
 static void run_vcpu(struct kvm_vcpu *vcpu)
 {
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 }
 
 static void *vcpu_worker(void *data)
index 4210cd2..20eb2e7 100644 (file)
@@ -157,7 +157,7 @@ static void *vcpu_worker(void *__data)
                                goto done;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
@@ -560,7 +560,7 @@ static void guest_code_test_memslot_rw(void)
                     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
                        uint64_t val = *(uint64_t *)ptr;
 
-                       GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+                       GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
                        *(uint64_t *)ptr = 0;
                }
 
index 1d73e78..c8e0a64 100644 (file)
@@ -237,8 +237,8 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without CMMA enabled should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, ENXIO);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, ENXIO);
 
        enable_cmma(vm);
        vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
@@ -247,31 +247,31 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without migration mode and without peeking should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        /* GET_CMMA_BITS without migration mode and with peeking should work */
        rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
-       ASSERT_EQ(rc, 0);
-       ASSERT_EQ(errno_out, 0);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(errno_out, 0);
 
        enable_dirty_tracking(vm);
        enable_migration_mode(vm);
 
        /* GET_CMMA_BITS with invalid flags */
        rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        kvm_vm_free(vm);
 }
 
 static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
 {
-       ASSERT_EQ(vcpu->run->exit_reason, 13);
-       ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
 }
 
 static void test_migration_mode(void)
@@ -283,8 +283,8 @@ static void test_migration_mode(void)
 
        /* enabling migration mode on a VM without memory should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -304,8 +304,8 @@ static void test_migration_mode(void)
 
        /* migration mode when memslots have dirty tracking off should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -314,7 +314,7 @@ static void test_migration_mode(void)
 
        /* enabling migration mode should work now */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -350,7 +350,7 @@ static void test_migration_mode(void)
         */
        vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -394,9 +394,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
-       ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
 
        /* ...and then - after a hole - the TEST_DATA memslot should follow */
        args = (struct kvm_s390_cmma_log){
@@ -407,9 +407,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+       TEST_ASSERT_EQ(args.remaining, 0);
 
        /* ...and nothing else should be there */
        args = (struct kvm_s390_cmma_log){
@@ -420,9 +420,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, 0);
-       ASSERT_EQ(args.start_gfn, 0);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, 0);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.remaining, 0);
 }
 
 /**
@@ -498,11 +498,11 @@ static void assert_cmma_dirty(u64 first_dirty_gfn,
                              u64 dirty_gfn_count,
                              const struct kvm_s390_cmma_log *res)
 {
-       ASSERT_EQ(res->start_gfn, first_dirty_gfn);
-       ASSERT_EQ(res->count, dirty_gfn_count);
+       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
        for (size_t i = 0; i < dirty_gfn_count; i++)
-               ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
-       ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
 }
 
 static void test_get_skip_holes(void)
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
new file mode 100644 (file)
index 0000000..84313fb
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+    "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+                                     size_t new_psw_off, uint64_t *new_psw)
+{
+       struct kvm_guest_debug debug = {};
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       char *lowcore;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       lowcore = addr_gpa2hva(vm, 0);
+       new_psw[0] = (*vcpu)->run->psw_mask;
+       new_psw[1] = (uint64_t)int_handler;
+       memcpy(lowcore + new_psw_off, new_psw, 16);
+       vcpu_regs_get(*vcpu, &regs);
+       regs.gprs[2] = -1;
+       vcpu_regs_set(*vcpu, &regs);
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+       vcpu_guest_debug_set(*vcpu, &debug);
+       vcpu_run(*vcpu);
+
+       return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+    "j .\n");
+
+static void test_step_pgm(void)
+{
+       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+    "diag %r0,%r0,0\n"
+    "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+       struct kvm_s390_irq irq = {
+               .type = KVM_S390_PROGRAM_INT,
+               .u.pgm.code = PGM_SPECIFICATION,
+       };
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+                            __LC_PGM_NEW_PSW, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+    "iske %r2,%r2\n"
+    "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+    "lctl %c0,%c0,1\n"
+    "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+    "svc 0\n"
+    "j .\n");
+
+static void test_step_svc(void)
+{
+       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "single-step pgm", test_step_pgm },
+       { "single-step pgm caused by diag", test_step_pgm_diag },
+       { "single-step pgm caused by iske", test_step_pgm_iske },
+       { "single-step pgm caused by lctl", test_step_pgm_lctl },
+       { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+       ksft_finished();
+}
index 8e4b94d..bb3ca9a 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2019, Red Hat, Inc.
  */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -279,10 +278,10 @@ enum stage {
        vcpu_run(__vcpu);                                               \
        get_ucall(__vcpu, &uc);                                         \
        if (uc.cmd == UCALL_ABORT) {                                    \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");           \
+               REPORT_GUEST_ASSERT(uc);                                \
        }                                                               \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                                  \
-       ASSERT_EQ(uc.args[1], __stage);                                 \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
 })                                                                     \
 
 static void prepare_mem12(void)
@@ -469,7 +468,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val)
        case 16:
                return val;
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -598,7 +597,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t
                        return ret;
                }
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -808,7 +807,7 @@ static void test_termination(void)
        HOST_SYNC(t.vcpu, STAGE_IDLED);
        MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
        /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
-       ASSERT_EQ(teid & teid_mask, 0);
+       TEST_ASSERT_EQ(teid & teid_mask, 0);
 
        kvm_vm_free(t.kvm_vm);
 }
index a9a0b76..c73f948 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright IBM Corp. 2021
  */
-
 #include <sys/mman.h>
 #include "test_util.h"
 #include "kvm_util.h"
@@ -156,7 +155,9 @@ static enum stage perform_next_stage(int *i, bool mapped_0)
                       !mapped_0;
                if (!skip) {
                        result = test_protection(tests[*i].addr, tests[*i].key);
-                       GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+                       __GUEST_ASSERT(result == tests[*i].expected,
+                                      "Wanted %u, got %u, for i = %u",
+                                      tests[*i].expected, result, *i);
                }
        }
        return stage;
@@ -190,9 +191,9 @@ static void guest_code(void)
        vcpu_run(__vcpu);                                       \
        get_ucall(__vcpu, &uc);                                 \
        if (uc.cmd == UCALL_ABORT)                              \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");   \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                          \
-       ASSERT_EQ(uc.args[1], __stage);                         \
+               REPORT_GUEST_ASSERT(uc);                        \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
 })
 
 #define HOST_SYNC(vcpu, stage)                 \
index a849ce2..b329601 100644 (file)
@@ -88,7 +88,7 @@ static void *vcpu_worker(void *data)
        }
 
        if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
-               REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+               REPORT_GUEST_ASSERT(uc);
 
        return NULL;
 }
@@ -156,19 +156,22 @@ static void guest_code_move_memory_region(void)
         * window where the memslot is invalid is usually quite small.
         */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the misaligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1 || val == 0, val);
+       __GUEST_ASSERT(val == 1 || val == 0,
+                      "Expected '0' or '1' (no MMIO), got '%llx'", val);
 
        /* Spin until the memory region starts to get re-aligned. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the re-aligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1, val);
+       GUEST_ASSERT_EQ(val, 1);
 
        GUEST_DONE();
 }
@@ -224,15 +227,15 @@ static void guest_code_delete_memory_region(void)
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        /* Spin until the memory region is recreated. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 0, val);
+       GUEST_ASSERT_EQ(val, 0);
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        asm("1:\n\t"
            ".pushsection .rodata\n\t"
@@ -249,7 +252,7 @@ static void guest_code_delete_memory_region(void)
            "final_rip_end: .quad 1b\n\t"
            ".popsection");
 
-       GUEST_ASSERT_1(0, 0);
+       GUEST_ASSERT(0);
 }
 
 static void test_delete_memory_region(void)
index c87f387..171adfb 100644 (file)
@@ -31,8 +31,8 @@ static uint64_t guest_stolen_time[NR_VCPUS];
 static void check_status(struct kvm_steal_time *st)
 {
        GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
-       GUEST_ASSERT(READ_ONCE(st->flags) == 0);
-       GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
 }
 
 static void guest_code(int cpu)
@@ -40,7 +40,7 @@ static void guest_code(int cpu)
        struct kvm_steal_time *st = st_gva[cpu];
        uint32_t version;
 
-       GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+       GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
 
        memset(st, 0, sizeof(*st));
        GUEST_SYNC(0);
@@ -122,8 +122,8 @@ static int64_t smccc(uint32_t func, uint64_t arg)
 
 static void check_status(struct st_time *st)
 {
-       GUEST_ASSERT(READ_ONCE(st->rev) == 0);
-       GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
 }
 
 static void guest_code(int cpu)
@@ -132,15 +132,15 @@ static void guest_code(int cpu)
        int64_t status;
 
        status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
 
        status = smccc(PV_TIME_ST, 0);
-       GUEST_ASSERT(status != -1);
-       GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+       GUEST_ASSERT_NE(status, -1);
+       GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
 
        st = (struct st_time *)status;
        GUEST_SYNC(0);
index d3c3aa9..3b34d81 100644 (file)
@@ -35,10 +35,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
                        guest_cpuid->entries[i].index,
                        &eax, &ebx, &ecx, &edx);
 
-               GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
-                            ebx == guest_cpuid->entries[i].ebx &&
-                            ecx == guest_cpuid->entries[i].ecx &&
-                            edx == guest_cpuid->entries[i].edx);
+               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
        }
 
 }
@@ -51,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid)
 
        GUEST_SYNC(2);
 
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001);
+       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
 
        GUEST_DONE();
 }
@@ -116,7 +116,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
        case UCALL_DONE:
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
index beb7e2c..634c6bf 100644 (file)
@@ -72,7 +72,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 
                vcpu_run(vcpu);
 
-               ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
 
                vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
 
@@ -179,12 +179,12 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * with that capability.
         */
        if (dirty_log_manual_caps) {
-               ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+               TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
        } else {
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
        }
 
        /*
@@ -192,9 +192,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * memory again, the page counts should be the same as they were
         * right after initial population of memory.
         */
-       ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
-       ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
-       ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+       TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
 }
 
 static void help(char *name)
index e334844..6c2e5e0 100644 (file)
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
        vcpu_run(vcpu);
        handle_flds_emulation_failure_exit(vcpu);
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
        kvm_vm_free(vm);
        return 0;
index 73af44d..e036db1 100644 (file)
@@ -8,7 +8,6 @@
  * Copyright 2022 Google LLC
  * Author: Vipin Sharma <vipinsh@google.com>
  */
-
 #include "kvm_util.h"
 #include "processor.h"
 #include "hyperv.h"
@@ -84,7 +83,7 @@ int main(void)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
index 78606de..9f28aa2 100644 (file)
@@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr)
                vector = rdmsr_safe(msr->idx, &msr_val);
 
        if (msr->fault_expected)
-               GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR);
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
        else
-               GUEST_ASSERT_3(!vector, msr->idx, vector, 0);
+               __GUEST_ASSERT(!vector,
+                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
 
        if (vector || is_write_only_msr(msr->idx))
                goto done;
 
        if (msr->write)
-               GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx,
-                              msr_val, msr->write_val);
+               __GUEST_ASSERT(!vector,
+                              "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'",
+                              msr->idx, msr->write_val, msr_val);
 
        /* Invariant TSC bit appears when TSC invariant control MSR is written to */
        if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
@@ -82,7 +87,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
        u64 res, input, output;
        uint8_t vector;
 
-       GUEST_ASSERT(hcall->control);
+       GUEST_ASSERT_NE(hcall->control, 0);
 
        wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
        wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
@@ -96,10 +101,14 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 
        vector = __hyperv_hypercall(hcall->control, input, output, &res);
        if (hcall->ud_expected) {
-               GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+               __GUEST_ASSERT(vector == UD_VECTOR,
+                              "Expected #UD for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
        } else {
-               GUEST_ASSERT_2(!vector, hcall->control, vector);
-               GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+               __GUEST_ASSERT(!vector,
+                              "Expected no exception for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
+               GUEST_ASSERT_EQ(res, hcall->expect);
        }
 
        GUEST_DONE();
@@ -495,7 +504,7 @@ static void guest_test_msrs_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
@@ -665,7 +674,7 @@ static void guest_test_hcalls_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
index f774a9e..9e2879a 100644 (file)
@@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr)
        PR_MSR(msr);
 
        vector = rdmsr_safe(msr->idx, &ignored);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 
        vector = wrmsr_safe(msr->idx, 0);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 }
 
 struct hcall_data {
@@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc)
 
        PR_HCALL(hc);
        r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
-       GUEST_ASSERT(r == -KVM_ENOSYS);
+       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
 }
 
 static void guest_main(void)
@@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
                        pr_hcall(&uc);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        return;
index 7281264..80aa3d8 100644 (file)
@@ -16,14 +16,25 @@ enum monitor_mwait_testcases {
        MWAIT_DISABLED = BIT(2),
 };
 
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
+do {                                                                   \
+       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
+                           ((testcase) & MWAIT_DISABLED);              \
+                                                                       \
+       if (fault_wanted)                                               \
+               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
+                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \
+       else                                                            \
+               __GUEST_ASSERT(!(vector),                               \
+                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \
+} while (0)
+
 static void guest_monitor_wait(int testcase)
 {
-       /*
-        * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
-        * in all other scenarios KVM should emulate them as nops.
-        */
-       bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
-                           (testcase & MWAIT_DISABLED);
        u8 vector;
 
        GUEST_SYNC(testcase);
@@ -33,16 +44,10 @@ static void guest_monitor_wait(int testcase)
         * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
         */
        vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
 
        vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
 }
 
 static void guest_code(void)
@@ -85,7 +90,7 @@ int main(int argc, char *argv[])
                        testcase = uc.args[1];
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+                       REPORT_GUEST_ASSERT(uc);
                        goto done;
                case UCALL_DONE:
                        goto done;
index 6502aa2..3670331 100644 (file)
@@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
                            "Expected L2 to ask for %d, L2 says it's done", vector);
                break;
        case UCALL_ABORT:
-               TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
-                         (const char *)uc.args[0], __FILE__, uc.args[1],
-                         uc.args[2], uc.args[3]);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
@@ -247,12 +245,12 @@ int main(int argc, char *argv[])
 
        /* Verify the pending events comes back out the same as it went in. */
        vcpu_events_get(vcpu, &events);
-       ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
-                 KVM_VCPUEVENT_VALID_PAYLOAD);
-       ASSERT_EQ(events.exception.pending, true);
-       ASSERT_EQ(events.exception.nr, SS_VECTOR);
-       ASSERT_EQ(events.exception.has_error_code, true);
-       ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+                       KVM_VCPUEVENT_VALID_PAYLOAD);
+       TEST_ASSERT_EQ(events.exception.pending, true);
+       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+       TEST_ASSERT_EQ(events.exception.has_error_code, true);
+       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
 
        /*
         * Run for real with the pending #SS, L1 should get a VM-Exit due to
index 40507ed..283cc55 100644 (file)
 #define ARCH_PERFMON_BRANCHES_RETIRED          5
 
 #define NUM_BRANCHES 42
+#define INTEL_PMC_IDX_FIXED            32
+
+/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
+#define MAX_FILTER_EVENTS              300
+#define MAX_TEST_EVENTS                10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (MAX_FILTER_EVENTS + 1)
 
 /*
  * This is how the event selector and unit mask are stored in an AMD
 
 #define INST_RETIRED EVENT(0xc0, 0)
 
+struct __kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u32 fixed_counter_bitmap;
+       __u32 flags;
+       __u32 pad[4];
+       __u64 events[MAX_FILTER_EVENTS];
+};
+
 /*
  * This event list comprises Intel's eight architectural events plus
  * AMD's "retired branch instructions" for Zen[123] (and possibly
  * other AMD CPUs).
  */
-static const uint64_t event_list[] = {
-       EVENT(0x3c, 0),
-       INST_RETIRED,
-       EVENT(0x3c, 1),
-       EVENT(0x2e, 0x4f),
-       EVENT(0x2e, 0x41),
-       EVENT(0xc4, 0),
-       EVENT(0xc5, 0),
-       EVENT(0xa4, 1),
-       AMD_ZEN_BR_RETIRED,
+static const struct __kvm_pmu_event_filter base_event_filter = {
+       .nevents = ARRAY_SIZE(base_event_filter.events),
+       .events = {
+               EVENT(0x3c, 0),
+               INST_RETIRED,
+               EVENT(0x3c, 1),
+               EVENT(0x2e, 0x4f),
+               EVENT(0x2e, 0x41),
+               EVENT(0xc4, 0),
+               EVENT(0xc5, 0),
+               EVENT(0xa4, 1),
+               AMD_ZEN_BR_RETIRED,
+       },
 };
 
 struct {
@@ -225,48 +246,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
        return !r;
 }
 
-static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
-{
-       struct kvm_pmu_event_filter *f;
-       int size = sizeof(*f) + nevents * sizeof(f->events[0]);
-
-       f = malloc(size);
-       TEST_ASSERT(f, "Out of memory");
-       memset(f, 0, size);
-       f->nevents = nevents;
-       return f;
-}
-
-
-static struct kvm_pmu_event_filter *
-create_pmu_event_filter(const uint64_t event_list[], int nevents,
-                       uint32_t action, uint32_t flags)
-{
-       struct kvm_pmu_event_filter *f;
-       int i;
-
-       f = alloc_pmu_event_filter(nevents);
-       f->action = action;
-       f->flags = flags;
-       for (i = 0; i < nevents; i++)
-               f->events[i] = event_list[i];
-
-       return f;
-}
-
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
-{
-       return create_pmu_event_filter(event_list,
-                                      ARRAY_SIZE(event_list),
-                                      action, 0);
-}
-
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
  */
-static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
-                                                uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
 {
        bool found = false;
        int i;
@@ -279,7 +263,6 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
        }
        if (found)
                f->nevents--;
-       return f;
 }
 
 #define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
@@ -315,66 +298,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu)
 }
 
 static void test_with_filter(struct kvm_vcpu *vcpu,
-                            struct kvm_pmu_event_filter *f)
+                            struct __kvm_pmu_event_filter *__f)
 {
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
        vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
        run_vcpu_and_sync_pmc_results(vcpu);
 }
 
 static void test_amd_deny_list(struct kvm_vcpu *vcpu)
 {
-       uint64_t event = EVENT(0x1C2, 0);
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .action = KVM_PMU_EVENT_DENY,
+               .nevents = 1,
+               .events = {
+                       EVENT(0x1C2, 0),
+               },
+       };
 
-       f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
-       test_with_filter(vcpu, f);
-       free(f);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_ALLOW;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
@@ -569,19 +559,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
                                   const uint64_t masked_events[],
                                   const int nmasked_events)
 {
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .nevents = nmasked_events,
+               .action = KVM_PMU_EVENT_ALLOW,
+               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+       };
 
-       f = create_pmu_event_filter(masked_events, nmasked_events,
-                                   KVM_PMU_EVENT_ALLOW,
-                                   KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
-       test_with_filter(vcpu, f);
-       free(f);
+       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+       test_with_filter(vcpu, &f);
 }
 
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS      300
-#define MAX_TEST_EVENTS                10
-
 #define ALLOW_LOADS            BIT(0)
 #define ALLOW_STORES           BIT(1)
 #define ALLOW_LOADS_STORES     BIT(2)
@@ -753,21 +740,33 @@ static void test_masked_events(struct kvm_vcpu *vcpu)
        run_masked_events_tests(vcpu, events, nevents);
 }
 
-static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events,
-                          int nevents, uint32_t flags)
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+                               struct __kvm_pmu_event_filter *__f)
 {
-       struct kvm_pmu_event_filter *f;
-       int r;
+       struct kvm_pmu_event_filter *f = (void *)__f;
 
-       f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags);
-       r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-       free(f);
+       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
 
-       return r;
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+                                      uint32_t flags, uint32_t action)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = 1,
+               .flags = flags,
+               .action = action,
+               .events = {
+                       event,
+               },
+       };
+
+       return set_pmu_event_filter(vcpu, &f);
 }
 
 static void test_filter_ioctl(struct kvm_vcpu *vcpu)
 {
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct __kvm_pmu_event_filter f;
        uint64_t e = ~0ul;
        int r;
 
@@ -775,15 +774,144 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
         * Unfortunately having invalid bits set in event data is expected to
         * pass when flags == 0 (bits other than eventsel+umask).
         */
-       r = run_filter_test(vcpu, &e, 1, 0);
+       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
 
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
 
        e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       f = base_event_filter;
+       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+       f = base_event_filter;
+       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+       f = base_event_filter;
+       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+       f = base_event_filter;
+       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+
+               /* Only OS_EN bit is enabled for fixed counter[idx]. */
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
+                     BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+       }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+                                              uint32_t action, uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = action,
+               .fixed_counter_bitmap = bitmap,
+       };
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+                                                  uint32_t action,
+                                                  uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = action;
+       f.fixed_counter_bitmap = bitmap;
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+                                       uint8_t nr_fixed_counters)
+{
+       unsigned int i;
+       uint32_t bitmap;
+       uint64_t count;
+
+       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+                   "Invalid nr_fixed_counters");
+
+       /*
+        * Check the fixed performance counter can count normally when KVM
+        * userspace doesn't set any pmu filter.
+        */
+       count = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+
+       for (i = 0; i < BIT(nr_fixed_counters); i++) {
+               bitmap = BIT(i);
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+               /*
+                * Check that fixed_counter_bitmap has higher priority than
+                * events[] when both are set.
+                */
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_ALLOW,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_DENY,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+       }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint8_t idx;
+
+       /*
+        * Check that pmu_event_filter works as expected when it's applied to
+        * fixed performance counters.
+        */
+       for (idx = 0; idx < nr_fixed_counters; idx++) {
+               vm = vm_create_with_one_vcpu(&vcpu,
+                                            intel_run_fixed_counter_guest_code);
+               vcpu_args_set(vcpu, 1, idx);
+               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+               kvm_vm_free(vm);
+       }
 }
 
 int main(int argc, char *argv[])
@@ -829,6 +957,7 @@ int main(int argc, char *argv[])
        kvm_vm_free(vm);
 
        test_pmu_config_disable(guest_code);
+       test_fixed_counter_bitmap();
 
        return 0;
 }
index 4c416eb..cbc92a8 100644 (file)
@@ -57,7 +57,7 @@ int main(void)
        for (i = 0; i < KVM_MAX_VCPUS; i++)
                vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
 
-       ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
 
        vcpuN = vcpus[KVM_MAX_VCPUS - 1];
        for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
@@ -65,8 +65,8 @@ int main(void)
                vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
        }
 
-       ASSERT_EQ(pthread_cancel(thread), 0);
-       ASSERT_EQ(pthread_join(thread, NULL), 0);
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
 
        kvm_vm_free(vm);
 
index b25d755..366cf18 100644 (file)
@@ -20,7 +20,7 @@ static void guest_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() != 0);
+       GUEST_ASSERT_NE(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -29,7 +29,7 @@ static void guest_not_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() == 0);
+       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -65,7 +65,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
                                        stage);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                default:
                        TEST_ASSERT(false, "Unexpected exit: %s",
                                    exit_reason_str(vcpu->run->exit_reason));
index 4e24797..7ee4449 100644 (file)
@@ -8,7 +8,6 @@
  *   Copyright (C) 2021, Red Hat, Inc.
  *
  */
-
 #include <stdatomic.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -34,13 +33,12 @@ static void l2_guest_code_int(void);
 static void guest_int_handler(struct ex_regs *regs)
 {
        int_fired++;
-       GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
-                      regs->rip, (unsigned long)l2_guest_code_int);
+       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
 }
 
 static void l2_guest_code_int(void)
 {
-       GUEST_ASSERT_1(int_fired == 1, int_fired);
+       GUEST_ASSERT_EQ(int_fired, 1);
 
        /*
          * Same as the vmmcall() function, but with a ud2 sneaked after the
@@ -53,7 +51,7 @@ static void l2_guest_code_int(void)
                              : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
                                "r10", "r11", "r12", "r13", "r14", "r15");
 
-       GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+       GUEST_ASSERT_EQ(bp_fired, 1);
        hlt();
 }
 
@@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs)
 
        if (nmi_stage_get() == 1) {
                vmmcall();
-               GUEST_ASSERT(false);
+               GUEST_FAIL("Unexpected resume after VMMCALL");
        } else {
-               GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
                GUEST_DONE();
        }
 }
@@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        }
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
                clgi();
                x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
 
-               GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
                nmi_stage_inc();
 
                stgi();
@@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        vmcb->control.next_rip = vmcb->save.rip + 2;
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -185,7 +185,7 @@ static void run_test(bool is_nmi)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
                /* NOT REACHED */
        case UCALL_DONE:
index 2da89fd..00965ba 100644 (file)
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <pthread.h>
 
 #include "test_util.h"
 #include "kvm_util.h"
@@ -80,6 +81,133 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
 #define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
 #define INVALID_SYNC_FIELD 0x80000000
 
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.injected, 1);
+               WRITE_ONCE(events->exception.pending, 1);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events.  KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.nr, UD_VECTOR);
+               WRITE_ONCE(events->exception.pending, 1);
+               WRITE_ONCE(events->exception.nr, 255);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       __u64 *cr4 = &run->s.regs.sregs.cr4;
+       __u64 pae_enabled = *cr4;
+       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+               WRITE_ONCE(*cr4, pae_enabled);
+               asm volatile(".rept 512\n\t"
+                            "nop\n\t"
+                            ".endr");
+               WRITE_ONCE(*cr4, pae_disabled);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+static void race_sync_regs(void *racer)
+{
+       const time_t TIMEOUT = 2; /* seconds, roughly */
+       struct kvm_x86_state *state;
+       struct kvm_translation tr;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       time_t t;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       run->kvm_valid_regs = 0;
+
+       /* Save state *before* spawning the thread that mucks with vCPU state. */
+       state = vcpu_save_state(vcpu);
+
+       /*
+        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+        * should already be set in guest state.
+        */
+       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+                   (run->s.regs.sregs.efer & EFER_LME),
+                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+                   !!(run->s.regs.sregs.efer & EFER_LME));
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               /*
+                * Reload known good state if the vCPU triple faults, e.g. due
+                * to the unhandled #GPs being injected.  VMX preserves state
+                * on shutdown, but SVM synthesizes an INIT as the VMCB state
+                * is architecturally undefined on triple fault.
+                */
+               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+                       vcpu_load_state(vcpu, state);
+
+               if (racer == race_sregs_cr4) {
+                       tr = (struct kvm_translation) { .linear_address = 0 };
+                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+               }
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_x86_state_cleanup(state);
+       kvm_vm_free(vm);
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_vcpu *vcpu;
@@ -218,5 +346,9 @@ int main(int argc, char *argv[])
 
        kvm_vm_free(vm);
 
+       race_sync_regs(race_sregs_cr4);
+       race_sync_regs(race_events_exc);
+       race_sync_regs(race_events_inj_pen);
+
        return 0;
 }
index c9f6770..12b0964 100644 (file)
@@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
                ksft_test_result_pass("stage %d passed\n", stage + 1);
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
@@ -103,39 +103,39 @@ int main(void)
        vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
        val = 0;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
        run_vcpu(vcpu, 1);
        val = 1ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
        run_vcpu(vcpu, 2);
        val = 2ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Host: writes to MSR_IA32_TSC set the host-side offset
         * and therefore do not change MSR_IA32_TSC_ADJUST.
         */
        vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
        run_vcpu(vcpu, 3);
 
        /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
 
        /* Restore previous value.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
@@ -143,8 +143,8 @@ int main(void)
         */
        run_vcpu(vcpu, 4);
        val = 3ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
@@ -152,8 +152,8 @@ int main(void)
         */
        run_vcpu(vcpu, 5);
        val = 4ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
 
        kvm_vm_free(vm);
 
index 0cb51fa..255c50b 100644 (file)
@@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count)
                end = (unsigned long)buffer + 8192;
 
        asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
-       GUEST_ASSERT_1(count == 0, count);
-       GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+       GUEST_ASSERT_EQ(count, 0);
+       GUEST_ASSERT_EQ((unsigned long)buffer, end);
 }
 
 static void guest_code(void)
@@ -43,7 +43,9 @@ static void guest_code(void)
        memset(buffer, 0, sizeof(buffer));
        guest_ins_port80(buffer, 8192);
        for (i = 0; i < 8192; i++)
-               GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+               __GUEST_ASSERT(buffer[i] == 0xaa,
+                              "Expected '0xaa', got '0x%x' at buffer[%u]",
+                              buffer[i], i);
 
        GUEST_DONE();
 }
@@ -91,7 +93,7 @@ int main(int argc, char *argv[])
        case UCALL_DONE:
                break;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_FAIL("Unknown ucall %lu", uc.cmd);
        }
index be0bdb8..a9b827c 100644 (file)
@@ -50,7 +50,7 @@ static void set_timer(void)
        timer.it_value.tv_sec  = 0;
        timer.it_value.tv_usec = 200;
        timer.it_interval = timer.it_value;
-       ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
 }
 
 static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
index 4c90f76..ebbcb0a 100644 (file)
@@ -10,7 +10,6 @@
  * and check it can be retrieved with KVM_GET_MSR, also test
  * the invalid LBR formats are rejected.
  */
-
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include <sys/ioctl.h>
 
@@ -52,23 +51,24 @@ static const union perf_capabilities format_caps = {
        .pebs_format = -1,
 };
 
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+       __GUEST_ASSERT(vector == GP_VECTOR,
+                      "Expected #GP for value '0x%llx', got vector '0x%x'",
+                      val, vector);
+}
+
 static void guest_code(uint64_t current_val)
 {
-       uint8_t vector;
        int i;
 
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
-       GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
-
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
-       GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+       guest_test_perf_capabilities_gp(current_val);
+       guest_test_perf_capabilities_gp(0);
 
-       for (i = 0; i < 64; i++) {
-               vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
-                                   current_val ^ BIT_ULL(i));
-               GUEST_ASSERT_2(vector == GP_VECTOR,
-                              current_val ^ BIT_ULL(i), vector);
-       }
+       for (i = 0; i < 64; i++)
+               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
 
        GUEST_DONE();
 }
@@ -95,7 +95,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
@@ -103,7 +103,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
                TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
        }
 
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+                       host_cap.capabilities);
 
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
 
index 396c13f..ab75b87 100644 (file)
@@ -65,17 +65,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
        vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
 
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
-       ASSERT_EQ(uc.args[1], val);
+       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+       TEST_ASSERT_EQ(uc.args[1], val);
 
        vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
        icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
              (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
        if (!x->is_x2apic) {
                val &= (-1u | (0xffull << (32 + 24)));
-               ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
        } else {
-               ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
        }
 }
 
index 905bd5a..77d04a7 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2022, Google LLC.
  */
-
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
  * Assert that architectural dependency rules are satisfied, e.g. that AVX is
  * supported if and only if SSE is supported.
  */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)    \
-do {                                                                             \
-       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
-                                                                                 \
-       GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) ||              \
-                      __supported == ((xfeatures) | (dependencies)),             \
-                      __supported, (xfeatures), (dependencies));                 \
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
+do {                                                                                   \
+       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
+                                                                                       \
+       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
+                      __supported == ((xfeatures) | (dependencies)),                   \
+                      "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+                      __supported, (xfeatures), (dependencies));                       \
 } while (0)
 
 /*
@@ -41,7 +41,8 @@ do {                                                                            \
 do {                                                                   \
        uint64_t __supported = (supported_xcr0) & (xfeatures);          \
                                                                        \
-       GUEST_ASSERT_2(!__supported || __supported == (xfeatures),      \
+       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
+                      "supported = 0x%llx, xfeatures = 0x%llx",        \
                       __supported, (xfeatures));                       \
 } while (0)
 
@@ -79,14 +80,18 @@ static void guest_code(void)
                                    XFEATURE_MASK_XTILE);
 
        vector = xsetbv_safe(0, supported_xcr0);
-       GUEST_ASSERT_2(!vector, supported_xcr0, vector);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(0x%llx), got vector '0x%x'",
+                      supported_xcr0, vector);
 
        for (i = 0; i < 64; i++) {
                if (supported_xcr0 & BIT_ULL(i))
                        continue;
 
                vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
-               GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i));
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'",
+                              BIT_ULL(i), supported_xcr0, vector);
        }
 
        GUEST_DONE();
@@ -117,7 +122,7 @@ int main(int argc, char *argv[])
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index c94cde3..e149d05 100644 (file)
@@ -108,16 +108,16 @@ int main(int argc, char *argv[])
                vcpu_run(vcpu);
 
                if (run->exit_reason == KVM_EXIT_XEN) {
-                       ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
-                       ASSERT_EQ(run->xen.u.hcall.cpl, 0);
-                       ASSERT_EQ(run->xen.u.hcall.longmode, 1);
-                       ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
-                       ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
-                       ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
-                       ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
-                       ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
-                       ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
-                       ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
                        run->xen.u.hcall.result = RETVALUE;
                        continue;
                }
index 4adaad1..2029455 100644 (file)
@@ -57,9 +57,14 @@ enum {
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
 /* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
 #define FOLL_WRITE     0x01    /* check pte is writable */
-#define FOLL_LONGTERM   0x10000 /* mapping lifetime is indefinite */
+#endif
 
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM   0x100 /* mapping lifetime is indefinite */
+#endif
 FIXTURE(hmm)
 {
        int             fd;
index 435aceb..380b691 100644 (file)
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
                                printf("Size must be greater than 0\n");
                                return KSFT_FAIL;
                        }
+                       break;
                case 't':
                        {
                                int tmp = atoi(optarg);
index 501854a..2f9d378 100644 (file)
@@ -15,6 +15,7 @@ ip_local_port_range
 ipsec
 ipv6_flowlabel
 ipv6_flowlabel_mgr
+log.txt
 msg_zerocopy
 nettest
 psock_fanout
@@ -45,6 +46,7 @@ test_unix_oob
 timestamping
 tls
 toeplitz
+tools
 tun
 txring_overwrite
 txtimestamp
index 0f5e88c..df8d90b 100755 (executable)
@@ -1981,6 +1981,11 @@ basic()
 
        run_cmd "$IP link set dev lo up"
 
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+       run_cmd "timeout 5 $IP nexthop"
+       log_test $? 0 "Maximum nexthop ID dump"
+
        #
        # groups
        #
@@ -2201,6 +2206,11 @@ basic_res()
        run_cmd "$IP nexthop bucket list fdb"
        log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
 
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+       run_cmd "timeout 5 $IP nexthop bucket"
+       log_test $? 0 "Maximum nexthop ID dump"
+
        #
        # resilient nexthop buckets get requests
        #
index ae3f946..d0c6c49 100755 (executable)
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
                grep -q "permanent"
        check_err $? "Entry not added as \"permanent\" when should"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_err $? "\"permanent\" entry has a pending group timer"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
                grep -q "temp"
        check_err $? "Entry not added as \"temp\" when should"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_fail $? "\"temp\" entry has an unpending group timer"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
                grep -q "permanent"
        check_err $? "Entry not marked as \"permanent\" after replace"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_err $? "Entry has a pending group timer after replace"
 
        bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
                grep -q "temp"
        check_err $? "Entry not marked as \"temp\" after replace"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_fail $? "Entry has an unpending group timer after replace"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -850,6 +850,7 @@ cfg_test()
 __fwd_test_host_ip()
 {
        local grp=$1; shift
+       local dmac=$1; shift
        local src=$1; shift
        local mode=$1; shift
        local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
        # Packet should only be flooded to multicast router ports when there is
        # no matching MDB entry. The bridge is not configured as a multicast
        # router port.
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 0
        check_err $? "Packet locally received after flood"
 
        # Install a regular port group entry and expect the packet to not be
        # locally received.
        bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 0
        check_err $? "Packet locally received after installing a regular entry"
 
        # Add a host entry and expect the packet to be locally received.
        bridge mdb add dev br0 port br0 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 1
        check_err $? "Packet not locally received after adding a host entry"
 
        # Remove the host entry and expect the packet to not be locally
        # received.
        bridge mdb del dev br0 port br0 grp $grp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 1
        check_err $? "Packet locally received after removing a host entry"
 
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
 
 fwd_test_host_ip()
 {
-       __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
-       __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+       __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+       __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
 }
 
 fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
 __fwd_test_port_ip()
 {
        local grp=$1; shift
+       local dmac=$1; shift
        local valid_src=$1; shift
        local invalid_src=$1; shift
        local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
                vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
                src_ip $invalid_src action drop
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 0
        check_err $? "Packet from valid source received on H2 before adding entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 0
        check_err $? "Packet from invalid source received on H2 before adding entry"
 
        bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
                filter_mode $filter_mode source_list $src_list
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 1
        check_err $? "Packet from valid source not received on H2 after adding entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 0
        check_err $? "Packet from invalid source received on H2 after adding entry"
 
        bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
                filter_mode exclude
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 2
        check_err $? "Packet from valid source not received on H2 after allowing all sources"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 1
        check_err $? "Packet from invalid source not received on H2 after allowing all sources"
 
        bridge mdb del dev br0 port $swp2 grp $grp vid 10
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 2
        check_err $? "Packet from valid source received on H2 after deleting entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 1
        check_err $? "Packet from invalid source received on H2 after deleting entry"
 
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
 
 fwd_test_port_ip()
 {
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                "exclude"
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                "include"
 }
 
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
                filter_mode include source_list 192.0.2.1
 
        # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
                -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
        bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
                filter_mode include source_list 192.0.2.1
 
        # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
                -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
        bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
 
        # IS_IN ( 2001:db8:1::2 )
        local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                -t ip hop=1,next=0,p="$p" -q
 
        bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
                filter_mode include source_list 2001:db8:1::1
 
        # IS_IN ( 2001:db8:1::2 )
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                -t ip hop=1,next=0,p="$p" -q
 
        bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
        ctrl_mldv2_is_in_test
 }
 
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+       echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+       exit $ksft_skip
+fi
+
 trap cleanup EXIT
 
 setup_prepare
index ae255b6..3da9d93 100755 (executable)
@@ -252,7 +252,8 @@ ctl4_entries_add()
        local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
        local peer=$(locus_dev_peer $locus)
        local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+       local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
                -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
        sleep 1
 
@@ -272,7 +273,8 @@ ctl4_entries_del()
 
        local peer=$(locus_dev_peer $locus)
        local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+       local dmac=01:00:5e:00:00:02
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
                -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
        sleep 1
        ! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
        local peer=$(locus_dev_peer $locus)
        local SIP=fe80::1
        local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
        local p=$(mldv2_is_in_get $SIP $GRP $IPs)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
        sleep 1
 
        local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
        local peer=$(locus_dev_peer $locus)
        local SIP=fe80::1
        local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
        local p=$(mldv1_done_get $SIP $GRP)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
        sleep 1
        ! bridge mdb show dev br0 | grep -q $GRP
 }
@@ -1328,6 +1334,11 @@ test_8021qvs()
        switch_destroy
 }
 
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+       echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+       exit $ksft_skip
+fi
+
 trap cleanup EXIT
 
 setup_prepare
index dbb9fcf..aa2eafb 100755 (executable)
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
        ethtool -s $h1 autoneg on
 }
 
+skip_on_veth
+
 trap cleanup EXIT
 
 setup_prepare
index c580ad6..39e736f 100755 (executable)
@@ -258,11 +258,6 @@ h2_destroy()
 
 setup_prepare()
 {
-       check_ethtool_mm_support
-       check_tc_fp_support
-       require_command lldptool
-       bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
        h1=${NETIFS[p1]}
        h2=${NETIFS[p2]}
 
@@ -278,6 +273,19 @@ cleanup()
        h1_destroy
 }
 
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+       ethtool --show-mm $netif 2>&1 &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: $netif does not support MAC Merge"
+               exit $ksft_skip
+       fi
+done
+
 trap cleanup EXIT
 
 setup_prepare
index eb9ec4a..7594bbb 100755 (executable)
@@ -99,6 +99,8 @@ test_stats_rx()
        test_stats g2a rx
 }
 
+skip_on_veth
+
 trap cleanup EXIT
 
 setup_prepare
index 9f5b3e2..49fa94b 100755 (executable)
@@ -14,6 +14,8 @@ ALL_TESTS="
 NUM_NETIFS=4
 source lib.sh
 
+require_command $TROUTE6
+
 h1_create()
 {
        simple_if_init $h1 2001:1:1::2/64
index 9ddb68d..f69015b 100755 (executable)
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
 REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
 STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
 TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
        fi
 }
 
+skip_on_veth()
+{
+       local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+               jq -r '.[].linkinfo.info_kind')
+
+       if [[ $kind == veth ]]; then
+               echo "SKIP: Test cannot be run with veth pairs"
+               exit $ksft_skip
+       fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
        echo "SKIP: need root privileges"
        exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
        for ((i = 1; i <= NUM_NETIFS; ++i)); do
                local j=$((i+1))
 
+               if [ -z ${NETIFS[p$i]} ]; then
+                       echo "SKIP: Cannot create interface. Name not specified"
+                       exit $ksft_skip
+               fi
+
                ip link show dev ${NETIFS[p$i]} &> /dev/null
                if [[ $? -ne 0 ]]; then
                        ip link add ${NETIFS[p$i]} type veth \
index aff88f7..5ea9d63 100755 (executable)
@@ -72,7 +72,8 @@ test_span_gre_ttl()
 
        RET=0
 
-       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       mirror_install $swp1 ingress $tundev \
+               "prot ip flower $tcflags ip_prot icmp"
        tc filter add dev $h3 ingress pref 77 prot $prot \
                flower skip_hw ip_ttl 50 action pass
 
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644 (file)
index 0000000..e7b9417
--- /dev/null
@@ -0,0 +1 @@
+timeout=0
index a96cff8..b0f5e55 100755 (executable)
@@ -9,6 +9,8 @@ NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
 
+require_command ncat
+
 tcflags="skip_hw"
 
 h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
                ip_proto icmp \
                        action drop
 
-       ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2  &
+       ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
        local rpid=$!
-       ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+       ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
        wait -n $rpid
        cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
        check_err $? "server output check failed"
index 683711f..b1daad1 100755 (executable)
@@ -52,8 +52,8 @@ match_dst_mac_test()
        tc_check_packets "dev $h2 ingress" 101 1
        check_fail $? "Matched on a wrong filter"
 
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
 
        tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
        tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
        tc_check_packets "dev $h2 ingress" 101 1
        check_fail $? "Matched on a wrong filter"
 
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
 
        tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
        tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
index e22c2d2..20a7cb7 100755 (executable)
@@ -127,6 +127,7 @@ test_l2_miss_multicast_common()
        local proto=$1; shift
        local sip=$1; shift
        local dip=$1; shift
+       local dmac=$1; shift
        local mode=$1; shift
        local name=$1; shift
 
@@ -142,7 +143,7 @@ test_l2_miss_multicast_common()
           action pass
 
        # Before adding MDB entry.
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 1
        check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
@@ -153,7 +154,7 @@ test_l2_miss_multicast_common()
        # Adding MDB entry.
        bridge mdb replace dev br1 port $swp2 grp $dip permanent
 
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 1
        check_err $? "Unregistered multicast filter was hit after adding MDB entry"
@@ -164,7 +165,7 @@ test_l2_miss_multicast_common()
        # Deleting MDB entry.
        bridge mdb del dev br1 port $swp2 grp $dip
 
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 2
        check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
@@ -183,10 +184,11 @@ test_l2_miss_multicast_ipv4()
        local proto="ipv4"
        local sip=192.0.2.1
        local dip=239.1.1.1
+       local dmac=01:00:5e:01:01:01
        local mode="-4"
        local name="IPv4"
 
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
 }
 
 test_l2_miss_multicast_ipv6()
@@ -194,10 +196,11 @@ test_l2_miss_multicast_ipv6()
        local proto="ipv6"
        local sip=2001:db8:1::1
        local dip=ff0e::1
+       local dmac=33:33:00:00:00:01
        local mode="-6"
        local name="IPv6"
 
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
 }
 
 test_l2_miss_multicast()
index 5ac184d..5a5dd90 100755 (executable)
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
        local i
 
        tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
-               flower ip_flags nofrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofrag action drop
        tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
-               flower ip_flags firstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags firstfrag action drop
        tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
-               flower ip_flags nofirstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofirstfrag action drop
 
        # test 'nofrag' set
        tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
index 3c2096a..d01b73a 100755 (executable)
@@ -705,6 +705,7 @@ pm_nl_del_endpoint()
        local addr=$3
 
        if [ $ip_mptcp -eq 1 ]; then
+               [ $id -ne 0 ] && addr=''
                ip -n $ns mptcp endpoint delete id $id $addr
        else
                ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -795,10 +796,11 @@ pm_nl_check_endpoint()
        fi
 
        if [ $ip_mptcp -eq 1 ]; then
+               # get line and trim trailing whitespace
                line=$(ip -n $ns mptcp endpoint show $id)
+               line="${line% }"
                # the dump order is: address id flags port dev
-               expected_line="$addr"
-               [ -n "$addr" ] && expected_line="$expected_line $addr"
+               [ -n "$addr" ] && expected_line="$addr"
                expected_line="$expected_line $id"
                [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
                [ -n "$dev" ] && expected_line="$expected_line $dev"
index dfe3d28..f838dd3 100755 (executable)
@@ -361,6 +361,7 @@ err_buf=
 tcpdump_pids=
 nettest_pids=
 socat_pids=
+tmpoutfile=
 
 err() {
        err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
        ip link del veth_A-R1                   2>/dev/null
        ovs-vsctl --if-exists del-port vxlan_a  2>/dev/null
        ovs-vsctl --if-exists del-br ovs_br0    2>/dev/null
+       rm -f "$tmpoutfile"
 }
 
 mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
        check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
        pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
        check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+       tmpoutfile=$(mktemp)
+
+       # Flush Exceptions, retry with TCP
+       run_cmd ${ns_a} ip route flush cached ${dst}
+       run_cmd ${ns_b} ip route flush cached ${dst}
+       run_cmd ${ns_c} ip route flush cached ${dst}
+
+       for target in "${ns_a}" "${ns_c}" ; do
+               if [ ${family} -eq 4 ]; then
+                       TCPDST=TCP:${dst}:50000
+               else
+                       TCPDST="TCP:[${dst}]:50000"
+               fi
+               ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+               sleep 1
+
+               dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+               size=$(du -sb $tmpoutfile)
+               size=${size%%/tmp/*}
+
+               [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+       done
+
+       rm -f "$tmpoutfile"
+
+       # Check that exceptions were created
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
 }
 
 test_pmtu_ipv4_br_vxlan4_exception() {
index b357ba2..7a957c7 100644 (file)
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
 CLANG_FLAGS += -no-integrated-as
 endif
 
+top_srcdir = ../../../..
+
 CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
-         $(CLANG_FLAGS)
+         $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
 LDLIBS += -lpthread -ldl
 
 # Own dependencies because we only want to build against 1st prerequisite, but
index a723da2..96e812b 100644 (file)
@@ -31,6 +31,8 @@
 #include <sys/auxv.h>
 #include <linux/auxvec.h>
 
+#include <linux/compiler.h>
+
 #include "../kselftest.h"
 #include "rseq.h"
 
index b74916d..484d087 100644 (file)
@@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
        bool
 
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
-       bool
-
 config HAVE_KVM_INVALID_WAKEUPS
        bool
 
index 5bbb561..d63cf1c 100644 (file)
@@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 }
 EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
        ++kvm->stat.generic.remote_tlb_flush_requests;
@@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
         * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
         * barrier here.
         */
-       if (!kvm_arch_flush_remote_tlb(kvm)
+       if (!kvm_arch_flush_remote_tlbs(kvm)
            || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.generic.remote_tlb_flush;
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
+{
+       if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
+               return;
+
+       /*
+        * Fall back to a flushing entire TLBs if the architecture range-based
+        * TLB invalidation is unsupported or can't be performed for whatever
+        * reason.
+        */
+       kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot)
+{
+       /*
+        * All current use cases for flushing the TLBs for a specific memslot
+        * are related to dirty logging, and many do the TLB flush out of
+        * mmu_lock. The interaction between the various operations on memslot
+        * must be serialized by slots_locks to ensure the TLB flush from one
+        * operation is observed by any other operation on the same memslot.
+        */
+       lockdep_assert_held(&kvm->slots_lock);
+       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
+}
 
 static void kvm_flush_shadow_all(struct kvm *kvm)
 {
@@ -526,7 +551,7 @@ typedef void (*on_unlock_fn_t)(struct kvm *kvm);
 struct kvm_hva_range {
        unsigned long start;
        unsigned long end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        hva_handler_t handler;
        on_lock_fn_t on_lock;
        on_unlock_fn_t on_unlock;
@@ -547,6 +572,8 @@ static void kvm_null_fn(void)
 }
 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
 
+static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
+
 /* Iterate over each memslot intersecting [start, last] (inclusive) range */
 #define kvm_for_each_memslot_in_hva_range(node, slots, start, last)         \
        for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
@@ -591,7 +618,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
                         * bother making these conditional (to avoid writes on
                         * the second or later invocation of the handler).
                         */
-                       gfn_range.pte = range->pte;
+                       gfn_range.arg = range->arg;
                        gfn_range.may_block = range->may_block;
 
                        /*
@@ -632,14 +659,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
                                                unsigned long start,
                                                unsigned long end,
-                                               pte_t pte,
+                                               union kvm_mmu_notifier_arg arg,
                                                hva_handler_t handler)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = pte,
+               .arg            = arg,
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -659,7 +686,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = __pte(0),
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -693,6 +719,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        pte_t pte)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       const union kvm_mmu_notifier_arg arg = { .pte = pte };
 
        trace_kvm_set_spte_hva(address);
 
@@ -708,7 +735,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
        if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                return;
 
-       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
+       kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
 }
 
 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
@@ -747,7 +774,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = kvm_unmap_gfn_range,
                .on_lock        = kvm_mmu_invalidate_begin,
                .on_unlock      = kvm_arch_guest_memory_reclaimed,
@@ -812,7 +838,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = (void *)kvm_null_fn,
                .on_lock        = kvm_mmu_invalidate_end,
                .on_unlock      = (void *)kvm_null_fn,
@@ -845,7 +870,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 {
        trace_kvm_age_hva(start, end);
 
-       return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn);
+       return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
+                                   kvm_age_gfn);
 }
 
 static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@@ -2180,7 +2206,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
        }
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
                return -EFAULT;
@@ -2297,7 +2323,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
        KVM_MMU_UNLOCK(kvm);
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        return 0;
 }