Merge tag 'thermal-6.6-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Sep 2023 20:24:00 +0000 (13:24 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Sep 2023 20:24:00 +0000 (13:24 -0700)
Pull more thermal control updates from Rafael Wysocki:
 "Eliminate an obsolete thermal zone registration function"

* tag 'thermal-6.6-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  thermal: core: Drop thermal_zone_device_register()
  thermal: Use thermal_tripless_zone_device_register()
  thermal: core: Add function for registering tripless thermal zones
  thermal: core: Clean up headers of thermal zone registration functions

1134 files changed:
.gitignore
Documentation/bpf/btf.rst
Documentation/bpf/index.rst
Documentation/bpf/linux-notes.rst [moved from Documentation/bpf/standardization/linux-notes.rst with 100% similarity]
Documentation/bpf/llvm_reloc.rst
Documentation/bpf/standardization/abi.rst [new file with mode: 0644]
Documentation/bpf/standardization/index.rst
Documentation/bpf/standardization/instruction-set.rst
Documentation/dev-tools/kasan.rst
Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
Documentation/devicetree/bindings/i3c/i3c.yaml
Documentation/devicetree/bindings/input/azoteq,iqs7222.yaml
Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
Documentation/devicetree/bindings/input/touchscreen/eeti,exc3000.yaml
Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt [deleted file]
Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/media/i2c/ov5695.txt [deleted file]
Documentation/devicetree/bindings/media/i2c/ov7251.txt [deleted file]
Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml
Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/media/rockchip-isp1.yaml
Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt [deleted file]
Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml
Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/maxim,ds3231.txt [deleted file]
Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml
Documentation/devicetree/bindings/rtc/st,m48t86.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
Documentation/devicetree/bindings/sound/fsl,easrc.yaml
Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
Documentation/devicetree/bindings/watchdog/ti,rti-wdt.yaml
Documentation/driver-api/libata.rst
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/kcov/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/filesystems/ceph.rst
Documentation/filesystems/gfs2-glocks.rst
Documentation/filesystems/proc.rst
Documentation/gpu/amdgpu/driver-misc.rst
Documentation/kbuild/kconfig.rst
Documentation/kbuild/llvm.rst
Documentation/process/maintainer-netdev.rst
Documentation/translations/zh_CN/dev-tools/kasan.rst
Documentation/userspace-api/netlink/intro.rst
Documentation/virt/kvm/api.rst
MAINTAINERS
Makefile
arch/alpha/include/asm/Kbuild
arch/alpha/lib/callback_srm.S
arch/alpha/lib/clear_page.S
arch/alpha/lib/clear_user.S
arch/alpha/lib/copy_page.S
arch/alpha/lib/copy_user.S
arch/alpha/lib/csum_ipv6_magic.S
arch/alpha/lib/divide.S
arch/alpha/lib/ev6-clear_page.S
arch/alpha/lib/ev6-clear_user.S
arch/alpha/lib/ev6-copy_page.S
arch/alpha/lib/ev6-copy_user.S
arch/alpha/lib/ev6-csum_ipv6_magic.S
arch/alpha/lib/ev6-divide.S
arch/alpha/lib/ev6-memchr.S
arch/alpha/lib/ev6-memcpy.S
arch/alpha/lib/ev6-memset.S
arch/alpha/lib/ev67-strcat.S
arch/alpha/lib/ev67-strchr.S
arch/alpha/lib/ev67-strlen.S
arch/alpha/lib/ev67-strncat.S
arch/alpha/lib/ev67-strrchr.S
arch/alpha/lib/memchr.S
arch/alpha/lib/memmove.S
arch/alpha/lib/memset.S
arch/alpha/lib/strcat.S
arch/alpha/lib/strchr.S
arch/alpha/lib/strcpy.S
arch/alpha/lib/strlen.S
arch/alpha/lib/strncat.S
arch/alpha/lib/strncpy.S
arch/alpha/lib/strrchr.S
arch/alpha/lib/udiv-qrnnd.S
arch/arc/Kconfig
arch/arc/Makefile
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/atomic-llsc.h
arch/arc/include/asm/atomic64-arcv2.h
arch/arc/include/asm/current.h
arch/arc/include/asm/dwarf.h
arch/arc/include/asm/entry-arcv2.h
arch/arc/include/asm/entry-compact.h
arch/arc/include/asm/entry.h
arch/arc/include/asm/irq.h
arch/arc/include/asm/mmu.h
arch/arc/include/asm/processor.h
arch/arc/include/asm/ptrace.h
arch/arc/include/asm/setup.h
arch/arc/include/asm/smp.h
arch/arc/include/asm/thread_info.h
arch/arc/include/asm/uaccess.h
arch/arc/kernel/Makefile
arch/arc/kernel/asm-offsets.c
arch/arc/kernel/ctx_sw.c [deleted file]
arch/arc/kernel/ctx_sw_asm.S
arch/arc/kernel/devtree.c
arch/arc/kernel/entry-arcv2.S
arch/arc/kernel/entry-compact.S
arch/arc/kernel/entry.S
arch/arc/kernel/intc-arcv2.c
arch/arc/kernel/kgdb.c
arch/arc/kernel/mcip.c
arch/arc/kernel/process.c
arch/arc/kernel/ptrace.c
arch/arc/kernel/setup.c
arch/arc/kernel/signal.c
arch/arc/kernel/smp.c
arch/arc/kernel/stacktrace.c
arch/arc/kernel/traps.c
arch/arc/kernel/troubleshoot.c
arch/arc/lib/memset-archs.S
arch/arc/mm/cache.c
arch/arc/mm/extable.c
arch/arc/mm/fault.c
arch/arc/mm/init.c
arch/arc/mm/tlb.c
arch/arc/plat-axs10x/axs10x.c
arch/arm/configs/dram_0x00000000.config
arch/arm/configs/dram_0xc0000000.config
arch/arm/configs/dram_0xd0000000.config
arch/arm/configs/lpae.config
arch/arm/include/asm/arm_pmuv3.h
arch/arm/include/asm/ide.h [deleted file]
arch/arm64/configs/virt.config
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/idreg-override.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/arm.c
arch/arm64/kvm/emulate-nested.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/nvhe/mm.h
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/tlb.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/pmu.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/trace_arm.h
arch/arm64/kvm/vgic/vgic.h
arch/arm64/lib/csum.c
arch/arm64/tools/cpucaps
arch/arm64/tools/sysreg
arch/ia64/include/asm/Kbuild
arch/ia64/kernel/entry.S
arch/ia64/kernel/esi_stub.S
arch/ia64/kernel/head.S
arch/ia64/kernel/ivt.S
arch/ia64/kernel/pal.S
arch/ia64/lib/clear_page.S
arch/ia64/lib/clear_user.S
arch/ia64/lib/copy_page.S
arch/ia64/lib/copy_page_mck.S
arch/ia64/lib/copy_user.S
arch/ia64/lib/flush.S
arch/ia64/lib/idiv32.S
arch/ia64/lib/idiv64.S
arch/ia64/lib/ip_fast_csum.S
arch/ia64/lib/memcpy.S
arch/ia64/lib/memcpy_mck.S
arch/ia64/lib/memset.S
arch/ia64/lib/strlen.S
arch/ia64/lib/strncpy_from_user.S
arch/ia64/lib/strnlen_user.S
arch/ia64/lib/xor.S
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/asm-prototypes.h
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/kasan.h [new file with mode: 0644]
arch/loongarch/include/asm/kfence.h [new file with mode: 0644]
arch/loongarch/include/asm/kgdb.h [new file with mode: 0644]
arch/loongarch/include/asm/lbt.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/mmzone.h
arch/loongarch/include/asm/page.h
arch/loongarch/include/asm/pgalloc.h
arch/loongarch/include/asm/pgtable.h
arch/loongarch/include/asm/processor.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/stackframe.h
arch/loongarch/include/asm/string.h
arch/loongarch/include/asm/switch_to.h
arch/loongarch/include/asm/thread_info.h
arch/loongarch/include/asm/xor.h [new file with mode: 0644]
arch/loongarch/include/asm/xor_simd.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/ptrace.h
arch/loongarch/include/uapi/asm/sigcontext.h
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/asm-offsets.c
arch/loongarch/kernel/cpu-probe.c
arch/loongarch/kernel/entry.S
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/head.S
arch/loongarch/kernel/kfpu.c
arch/loongarch/kernel/kgdb.c [new file with mode: 0644]
arch/loongarch/kernel/lbt.S [new file with mode: 0644]
arch/loongarch/kernel/numa.c
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/signal.c
arch/loongarch/kernel/stacktrace.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/Makefile
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/xor_simd.c [new file with mode: 0644]
arch/loongarch/lib/xor_simd.h [new file with mode: 0644]
arch/loongarch/lib/xor_simd_glue.c [new file with mode: 0644]
arch/loongarch/lib/xor_template.c [new file with mode: 0644]
arch/loongarch/mm/Makefile
arch/loongarch/mm/cache.c
arch/loongarch/mm/fault.c
arch/loongarch/mm/init.c
arch/loongarch/mm/kasan_init.c [new file with mode: 0644]
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/loongarch/vdso/Makefile
arch/m68k/include/asm/ide.h [deleted file]
arch/microblaze/include/asm/page.h
arch/microblaze/include/asm/setup.h
arch/microblaze/kernel/reset.c
arch/microblaze/mm/init.c
arch/mips/Makefile
arch/mips/bmips/setup.c
arch/mips/cavium-octeon/flash_setup.c
arch/mips/cavium-octeon/octeon-memcpy.S
arch/mips/cavium-octeon/octeon-platform.c
arch/mips/configs/ip22_defconfig
arch/mips/configs/loongson3_defconfig
arch/mips/configs/malta_defconfig
arch/mips/configs/malta_kvm_defconfig
arch/mips/configs/maltaup_xpa_defconfig
arch/mips/configs/rm200_defconfig
arch/mips/include/asm/Kbuild
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-loongson32/loongson1.h
arch/mips/include/asm/mach-loongson32/regs-clk.h [deleted file]
arch/mips/include/asm/mach-loongson32/regs-rtc.h [deleted file]
arch/mips/kernel/mcount.S
arch/mips/kernel/octeon_switch.S
arch/mips/kernel/r2300_fpu.S
arch/mips/kernel/r2300_switch.S
arch/mips/kernel/r4k_fpu.S
arch/mips/kvm/mips.c
arch/mips/kvm/mmu.c
arch/mips/lantiq/irq.c
arch/mips/lantiq/xway/dcdc.c
arch/mips/lantiq/xway/gptu.c
arch/mips/lantiq/xway/sysctrl.c
arch/mips/lantiq/xway/vmmc.c
arch/mips/lib/csum_partial.S
arch/mips/lib/memcpy.S
arch/mips/lib/memset.S
arch/mips/lib/strncpy_user.S
arch/mips/lib/strnlen_user.S
arch/mips/loongson32/common/platform.c
arch/mips/loongson64/smp.c
arch/mips/mm/page-funcs.S
arch/mips/mm/tlb-funcs.S
arch/mips/pci/pci-lantiq.c
arch/mips/pci/pci-rt2880.c
arch/mips/pic32/pic32mzda/config.c
arch/mips/ralink/ill_acc.c
arch/mips/ralink/irq.c
arch/mips/ralink/of.c
arch/mips/ralink/prom.c
arch/mips/txx9/generic/pci.c
arch/mips/vdso/vdso.lds.S
arch/openrisc/include/asm/bug.h [new file with mode: 0644]
arch/openrisc/include/asm/page.h
arch/openrisc/include/asm/processor.h
arch/openrisc/kernel/process.c
arch/openrisc/kernel/ptrace.c
arch/openrisc/kernel/signal.c
arch/openrisc/kernel/smp.c
arch/openrisc/kernel/time.c
arch/openrisc/kernel/traps.c
arch/openrisc/mm/fault.c
arch/openrisc/mm/init.c
arch/openrisc/mm/ioremap.c
arch/openrisc/mm/tlb.c
arch/parisc/include/asm/ide.h [deleted file]
arch/powerpc/configs/disable-werror.config
arch/powerpc/configs/security.config
arch/powerpc/include/asm/ide.h [deleted file]
arch/riscv/configs/32-bit.config
arch/riscv/configs/64-bit.config
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/kvm_host.h
arch/riscv/include/asm/kvm_vcpu_vector.h
arch/riscv/include/uapi/asm/kvm.h
arch/riscv/kvm/Makefile
arch/riscv/kvm/aia.c
arch/riscv/kvm/mmu.c
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_fp.c
arch/riscv/kvm/vcpu_onereg.c [new file with mode: 0644]
arch/riscv/kvm/vcpu_sbi.c
arch/riscv/kvm/vcpu_timer.c
arch/riscv/kvm/vcpu_vector.c
arch/s390/boot/ipl_parm.c
arch/s390/boot/startup.c
arch/s390/boot/vmem.c
arch/s390/configs/btf.config
arch/s390/configs/kasan.config
arch/s390/include/asm/airq.h
arch/s390/include/asm/dma.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/sections.h
arch/s390/include/asm/set_memory.h
arch/s390/include/asm/setup.h
arch/s390/include/asm/uv.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/kernel/early.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/setup.c
arch/s390/kernel/uv.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/pv.c
arch/s390/mm/dump_pagetables.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/pageattr.c
arch/s390/mm/vmem.c
arch/s390/net/bpf_jit_comp.c
arch/sparc/include/asm/Kbuild
arch/sparc/include/asm/ide.h [deleted file]
arch/sparc/kernel/entry.S
arch/sparc/kernel/head_32.S
arch/sparc/kernel/head_64.S
arch/sparc/lib/U1memcpy.S
arch/sparc/lib/VISsave.S
arch/sparc/lib/ashldi3.S
arch/sparc/lib/ashrdi3.S
arch/sparc/lib/atomic_64.S
arch/sparc/lib/bitops.S
arch/sparc/lib/blockops.S
arch/sparc/lib/bzero.S
arch/sparc/lib/checksum_32.S
arch/sparc/lib/checksum_64.S
arch/sparc/lib/clear_page.S
arch/sparc/lib/copy_in_user.S
arch/sparc/lib/copy_page.S
arch/sparc/lib/copy_user.S
arch/sparc/lib/csum_copy.S
arch/sparc/lib/divdi3.S
arch/sparc/lib/ffs.S
arch/sparc/lib/fls.S
arch/sparc/lib/fls64.S
arch/sparc/lib/hweight.S
arch/sparc/lib/ipcsum.S
arch/sparc/lib/locks.S
arch/sparc/lib/lshrdi3.S
arch/sparc/lib/mcount.S
arch/sparc/lib/memcmp.S
arch/sparc/lib/memcpy.S
arch/sparc/lib/memmove.S
arch/sparc/lib/memscan_32.S
arch/sparc/lib/memscan_64.S
arch/sparc/lib/memset.S
arch/sparc/lib/muldi3.S
arch/sparc/lib/multi3.S
arch/sparc/lib/strlen.S
arch/sparc/lib/strncmp_32.S
arch/sparc/lib/strncmp_64.S
arch/sparc/lib/xor.S
arch/sparc/mm/tlb.c
arch/x86/Makefile
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kexec.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_page_track.h
arch/x86/include/asm/reboot.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/virtext.h [deleted file]
arch/x86/include/asm/vmx.h
arch/x86/kernel/crash.c
arch/x86/kernel/reboot.c
arch/x86/kvm/Kconfig
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/governed_features.h [new file with mode: 0644]
arch/x86/kvm/hyperv.c
arch/x86/kvm/kvm_emulate.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/mmu/page_track.h [new file with mode: 0644]
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/mmu/spte.h
arch/x86/kvm/mmu/tdp_iter.c
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/reverse_cpuid.h
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/hyperv.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/nested.h
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/xtensa/Kconfig
arch/xtensa/include/asm/core.h
arch/xtensa/include/asm/mtd-xip.h [new file with mode: 0644]
arch/xtensa/include/asm/sections.h
arch/xtensa/kernel/perf_event.c
arch/xtensa/kernel/setup.c
arch/xtensa/kernel/vmlinux.lds.S
drivers/accel/ivpu/ivpu_jsm_msg.c
drivers/ata/ahci.c
drivers/ata/ahci_ceva.c
drivers/ata/ahci_dwc.c
drivers/ata/ahci_mtk.c
drivers/ata/ahci_mvebu.c
drivers/ata/ahci_octeon.c
drivers/ata/ahci_qoriq.c
drivers/ata/ahci_seattle.c
drivers/ata/ahci_sunxi.c
drivers/ata/ahci_tegra.c
drivers/ata/ahci_xgene.c
drivers/ata/libahci.c
drivers/ata/libahci_platform.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-sata.c
drivers/ata/libata-scsi.c
drivers/ata/libata-sff.c
drivers/ata/libata.h
drivers/ata/pata_arasan_cf.c
drivers/ata/pata_buddha.c
drivers/ata/pata_ep93xx.c
drivers/ata/pata_falcon.c
drivers/ata/pata_ftide010.c
drivers/ata/pata_gayle.c
drivers/ata/pata_imx.c
drivers/ata/pata_ixp4xx_cf.c
drivers/ata/pata_mpc52xx.c
drivers/ata/pata_pxa.c
drivers/ata/pata_rb532_cf.c
drivers/ata/pata_sl82c105.c
drivers/ata/sata_dwc_460ex.c
drivers/ata/sata_fsl.c
drivers/ata/sata_gemini.c
drivers/ata/sata_highbank.c
drivers/ata/sata_inic162x.c
drivers/ata/sata_mv.c
drivers/ata/sata_nv.c
drivers/ata/sata_rcar.c
drivers/ata/sata_sil24.c
drivers/ata/sata_sx4.c
drivers/block/rbd.c
drivers/char/tpm/tpm_crb.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pcc-cpufreq.c
drivers/gpio/gpio-zynq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
drivers/gpu/drm/amd/display/dc/Makefile
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
drivers/gpu/drm/amd/include/amd_shared.h
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/include/discovery.h
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/gvt/gtt.h
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/page_track.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/nouveau/dispnv04/crtc.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_chan.c
drivers/gpu/drm/nouveau/nouveau_dmem.c
drivers/gpu/drm/nouveau/nouveau_exec.c
drivers/gpu/drm/nouveau/nouveau_fence.c
drivers/gpu/drm/nouveau/nouveau_fence.h
drivers/gpu/drm/nouveau/nouveau_gem.c
drivers/i3c/master.c
drivers/i3c/master/ast2600-i3c-master.c
drivers/i3c/master/i3c-master-cdns.c
drivers/i3c/master/mipi-i3c-hci/cmd_v1.c
drivers/i3c/master/svc-i3c-master.c
drivers/input/gameport/Kconfig
drivers/input/gameport/gameport.c
drivers/input/joystick/xpad.c
drivers/input/keyboard/adp5588-keys.c
drivers/input/keyboard/amikbd.c
drivers/input/keyboard/bcm-keypad.c
drivers/input/keyboard/gpio_keys.c
drivers/input/keyboard/gpio_keys_polled.c
drivers/input/keyboard/lm8323.c
drivers/input/keyboard/lm8333.c
drivers/input/keyboard/lpc32xx-keys.c
drivers/input/keyboard/mcs_touchkey.c
drivers/input/keyboard/nomadik-ske-keypad.c
drivers/input/keyboard/nspire-keypad.c
drivers/input/keyboard/omap4-keypad.c
drivers/input/keyboard/opencores-kbd.c
drivers/input/keyboard/pinephone-keyboard.c
drivers/input/keyboard/pxa27x_keypad.c
drivers/input/keyboard/qt1070.c
drivers/input/keyboard/qt2160.c
drivers/input/keyboard/sun4i-lradc-keys.c
drivers/input/keyboard/tca6416-keypad.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/keyboard/tm2-touchkey.c
drivers/input/misc/Kconfig
drivers/input/misc/cpcap-pwrbutton.c
drivers/input/misc/da9063_onkey.c
drivers/input/misc/gpio-vibra.c
drivers/input/misc/iqs269a.c
drivers/input/misc/iqs626a.c
drivers/input/misc/iqs7222.c
drivers/input/misc/mma8450.c
drivers/input/misc/pm8941-pwrkey.c
drivers/input/misc/pm8xxx-vibrator.c
drivers/input/misc/pmic8xxx-pwrkey.c
drivers/input/misc/pwm-beeper.c
drivers/input/misc/pwm-vibra.c
drivers/input/misc/rotary_encoder.c
drivers/input/misc/sparcspkr.c
drivers/input/mouse/elan_i2c_core.c
drivers/input/mouse/psmouse-smbus.c
drivers/input/serio/apbps2.c
drivers/input/serio/i8042-acpipnpio.h
drivers/input/serio/i8042-sparcio.h
drivers/input/serio/rpckbd.c
drivers/input/serio/xilinx_ps2.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/Makefile
drivers/input/touchscreen/bu21013_ts.c
drivers/input/touchscreen/bu21029_ts.c
drivers/input/touchscreen/chipone_icn8318.c
drivers/input/touchscreen/cy8ctma140.c
drivers/input/touchscreen/cyttsp5.c
drivers/input/touchscreen/edt-ft5x06.c
drivers/input/touchscreen/ektf2127.c
drivers/input/touchscreen/elants_i2c.c
drivers/input/touchscreen/exc3000.c
drivers/input/touchscreen/goodix.c
drivers/input/touchscreen/ili210x.c
drivers/input/touchscreen/iqs5xx.c
drivers/input/touchscreen/iqs7211.c [new file with mode: 0644]
drivers/input/touchscreen/lpc32xx_ts.c
drivers/input/touchscreen/melfas_mip4.c
drivers/input/touchscreen/mms114.c
drivers/input/touchscreen/novatek-nvt-ts.c
drivers/input/touchscreen/pixcir_i2c_ts.c
drivers/input/touchscreen/raydium_i2c_ts.c
drivers/input/touchscreen/resistive-adc-touch.c
drivers/input/touchscreen/silead.c
drivers/input/touchscreen/sis_i2c.c
drivers/input/touchscreen/surface3_spi.c
drivers/input/touchscreen/sx8654.c
drivers/input/touchscreen/ti_am335x_tsc.c
drivers/mailbox/arm_mhu.c
drivers/mailbox/arm_mhu_db.c
drivers/mailbox/bcm-flexrm-mailbox.c
drivers/mailbox/bcm-pdc-mailbox.c
drivers/mailbox/hi3660-mailbox.c
drivers/mailbox/hi6220-mailbox.c
drivers/mailbox/imx-mailbox.c
drivers/mailbox/mailbox-mpfs.c
drivers/mailbox/mailbox-test.c
drivers/mailbox/mailbox.c
drivers/mailbox/mtk-adsp-mailbox.c
drivers/mailbox/mtk-cmdq-mailbox.c
drivers/mailbox/omap-mailbox.c
drivers/mailbox/platform_mhu.c
drivers/mailbox/qcom-ipcc.c
drivers/mailbox/rockchip-mailbox.c
drivers/mailbox/sprd-mailbox.c
drivers/mailbox/stm32-ipcc.c
drivers/mailbox/tegra-hsp.c
drivers/mailbox/ti-msgmgr.c
drivers/mailbox/zynqmp-ipi-mailbox.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_spi.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/google/gve/gve_rx_dqo.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igbvf/igbvf.h
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/macsec.c
drivers/net/phy/micrel.c
drivers/net/veth.c
drivers/nfc/nxp-nci/i2c.c
drivers/perf/arm_pmuv3.c
drivers/perf/cxl_pmu.c
drivers/powercap/intel_rapl_common.c
drivers/pwm/core.c
drivers/pwm/pwm-apple.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-atmel-tcb.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-kona.c
drivers/pwm/pwm-berlin.c
drivers/pwm/pwm-crc.c
drivers/pwm/pwm-cros-ec.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-imx1.c
drivers/pwm/pwm-jz4740.c
drivers/pwm/pwm-lp3943.c
drivers/pwm/pwm-lpc18xx-sct.c
drivers/pwm/pwm-lpc32xx.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-meson.c
drivers/pwm/pwm-microchip-core.c
drivers/pwm/pwm-mtk-disp.c
drivers/pwm/pwm-ntxec.c
drivers/pwm/pwm-pxa.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-rz-mtu3.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sl28cpld.c
drivers/pwm/pwm-sprd.c
drivers/pwm/pwm-stm32.c
drivers/pwm/pwm-stmpe.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-sunplus.c
drivers/pwm/pwm-tegra.c
drivers/pwm/pwm-tiecap.c
drivers/pwm/pwm-tiehrpwm.c
drivers/pwm/pwm-visconti.c
drivers/pwm/pwm-vt8500.c
drivers/regulator/tps6287x-regulator.c
drivers/regulator/tps6594-regulator.c
drivers/rtc/Kconfig
drivers/rtc/interface.c
drivers/rtc/rtc-abx80x.c
drivers/rtc/rtc-armada38x.c
drivers/rtc/rtc-aspeed.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-at91sam9.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-cros-ec.c
drivers/rtc/rtc-da9063.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-ds2404.c
drivers/rtc/rtc-fsl-ftm-alarm.c
drivers/rtc/rtc-isl12022.c
drivers/rtc/rtc-isl12026.c
drivers/rtc/rtc-isl1208.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-lpc24xx.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-mpc5121.c
drivers/rtc/rtc-mt6397.c
drivers/rtc/rtc-mt7622.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-nct3018y.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pxa.c
drivers/rtc/rtc-rs5c372.c
drivers/rtc/rtc-rv3028.c
drivers/rtc/rtc-rv3032.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8581.c
drivers/rtc/rtc-rzn1.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-stm32.c
drivers/rtc/rtc-stmp3xxx.c
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-sunplus.c
drivers/rtc/rtc-sunxi.c
drivers/rtc/rtc-ti-k3.c
drivers/rtc/rtc-tps6586x.c
drivers/rtc/rtc-tps65910.c
drivers/rtc/rtc-twl.c
drivers/rtc/rtc-wm8350.c
drivers/s390/block/dcssblk.c
drivers/s390/char/monreader.c
drivers/s390/cio/airq.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/virtio/virtio_ccw.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/libsas/sas_ata.c
drivers/scsi/libsas/sas_discover.c
drivers/spi/spi-sun6i.c
drivers/staging/greybus/pwm.c
drivers/usb/dwc3/dwc3-octeon.c
drivers/video/backlight/gpio_backlight.c
drivers/video/backlight/led_bl.c
drivers/video/backlight/lp855x_bl.c
drivers/video/backlight/qcom-wled.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/armada_37xx_wdt.c
drivers/watchdog/at91rm9200_wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/ftwdt010_wdt.c
drivers/watchdog/gef_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/imx7ulp_wdt.c
drivers/watchdog/intel-mid_wdt.c
drivers/watchdog/lantiq_wdt.c
drivers/watchdog/loongson1_wdt.c
drivers/watchdog/marvell_gti_wdt.c [new file with mode: 0644]
drivers/watchdog/menz69_wdt.c
drivers/watchdog/meson_gxbb_wdt.c
drivers/watchdog/meson_wdt.c
drivers/watchdog/mpc8xxx_wdt.c
drivers/watchdog/mtk_wdt.c
drivers/watchdog/of_xilinx_wdt.c
drivers/watchdog/pic32-dmt.c
drivers/watchdog/pic32-wdt.c
drivers/watchdog/pika_wdt.c
drivers/watchdog/pm8916_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/rave-sp-wdt.c
drivers/watchdog/riowd.c
drivers/watchdog/rti_wdt.c
drivers/watchdog/rza_wdt.c
drivers/watchdog/rzg2l_wdt.c
drivers/watchdog/s3c2410_wdt.c
drivers/watchdog/sama5d4_wdt.c
drivers/watchdog/sbsa_gwdt.c
drivers/watchdog/starfive-wdt.c
drivers/watchdog/stm32_iwdg.c
drivers/watchdog/sunxi_wdt.c
drivers/watchdog/watchdog_core.c
drivers/watchdog/xilinx_wwdt.c
fs/ceph/Makefile
fs/ceph/acl.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/crypto.c [new file with mode: 0644]
fs/ceph/crypto.h [new file with mode: 0644]
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/quota.c
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/readdir.c
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/main.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
fs/gfs2/recovery.c
fs/gfs2/recovery.h
fs/gfs2/super.c
fs/gfs2/super.h
fs/gfs2/sys.c
fs/gfs2/util.c
fs/ntfs3/super.c
fs/proc/task_mmu.c
fs/stat.c
include/asm-generic/ide_iops.h [deleted file]
include/kvm/arm_pmu.h
include/linux/audit.h
include/linux/bpf.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/messenger.h
include/linux/ceph/osd_client.h
include/linux/ceph/rados.h
include/linux/cpufreq.h
include/linux/export.h
include/linux/gameport.h
include/linux/ipv6.h
include/linux/kasan.h
include/linux/kvm_host.h
include/linux/libata.h
include/linux/micrel_phy.h
include/linux/of.h
include/linux/phylink.h
include/linux/platform_data/rtc-ds2404.h [deleted file]
include/linux/pwm.h
include/linux/raid/pq.h
include/linux/rmap.h
include/linux/rtc.h
include/linux/tca6416_keypad.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/scm.h
include/net/sock.h
include/sound/dmaengine_pcm.h
include/sound/soc-component.h
include/uapi/linux/fuse.h
include/uapi/linux/netfilter/nf_tables.h
kernel/auditsc.c
kernel/bpf/bpf_local_storage.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/configs/debug.config
kernel/configs/kvm_guest.config
kernel/configs/nopm.config
kernel/configs/rust.config
kernel/configs/x86_debug.config
kernel/configs/xen.config
kernel/printk/printk.c
lib/raid6/Makefile
lib/raid6/algos.c
lib/raid6/loongarch.h [new file with mode: 0644]
lib/raid6/loongarch_simd.c [new file with mode: 0644]
lib/raid6/recov_loongarch_simd.c [new file with mode: 0644]
lib/raid6/test/Makefile
mm/filemap.c
mm/kasan/init.c
mm/kasan/kasan.h
mm/kfence/core.c
mm/kmemleak.c
mm/ksm.c
mm/memcontrol.c
mm/memfd.c
mm/memory-failure.c
mm/page_alloc.c
mm/util.c
mm/vmalloc.c
net/bpf/test_run.c
net/can/j1939/socket.c
net/ceph/messenger.c
net/ceph/messenger_v1.c
net/ceph/messenger_v2.c
net/ceph/osd_client.c
net/core/flow_dissector.c
net/core/skbuff.c
net/core/skmsg.c
net/core/sock.c
net/core/sock_map.c
net/handshake/netlink.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/igmp.c
net/ipv4/ip_forward.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ipmr.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/ip6_input.c
net/ipv6/ip6_output.c
net/ipv6/ip6mr.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/udp.c
net/kcm/kcmsock.c
net/mptcp/protocol.c
net/netfilter/ipset/ip_set_hash_netportnet.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_osf.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_set_rbtree.c
net/netfilter/xt_sctp.c
net/netfilter/xt_u32.c
net/sched/sch_fq_pie.c
net/sched/sch_plug.c
net/sched/sch_qfq.c
net/sctp/proc.c
net/sctp/socket.c
net/socket.c
net/unix/af_unix.c
net/unix/scm.c
net/xdp/xsk.c
net/xdp/xsk_diag.c
scripts/Makefile.extrawarn
scripts/Makefile.modinst
scripts/Makefile.modpost
scripts/Makefile.package
scripts/bpf_doc.py
scripts/depmod.sh
scripts/dummy-tools/gcc
scripts/kconfig/Makefile
scripts/kconfig/confdata.c
scripts/kconfig/expr.h
scripts/kconfig/lkc.h
scripts/kconfig/lxdialog/dialog.h
scripts/kconfig/lxdialog/textbox.c
scripts/kconfig/mconf.c
scripts/kconfig/menu.c
scripts/kconfig/nconf.c
scripts/kconfig/nconf.gui.c
scripts/kconfig/nconf.h
scripts/kconfig/preprocess.c
scripts/kconfig/qconf-cfg.sh
scripts/kconfig/qconf.cc
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/package/builddeb
scripts/package/debian/rules [new file with mode: 0755]
scripts/package/install-extmod-build [new file with mode: 0755]
scripts/package/kernel.spec [new file with mode: 0644]
scripts/package/mkdebian
scripts/package/mkspec
scripts/remove-stale-files
scripts/setlocalversion
security/landlock/ruleset.h
sound/core/pcm_lib.c
sound/core/seq/seq_memory.c
sound/isa/sb/emu8000_pcm.c
sound/pci/hda/patch_cs8409.c
sound/pci/hda/patch_cs8409.h
sound/pci/hda/patch_realtek.c
sound/pci/hda/tas2781_hda_i2c.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/atmel/mchp-pdmc.c
sound/soc/codecs/Kconfig
sound/soc/codecs/Makefile
sound/soc/codecs/cs35l45.c
sound/soc/codecs/cs35l56-shared.c
sound/soc/codecs/cs42l43.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/wcd-clsh-v2.c
sound/soc/intel/avs/pcm.c
sound/soc/soc-component.c
sound/soc/soc-generic-dmaengine-pcm.c
sound/soc/stm/stm32_sai_sub.c
sound/usb/midi2.c
tools/arch/x86/include/asm/cpufeatures.h
tools/bpf/bpftool/link.c
tools/mm/Makefile
tools/power/cpupower/Makefile
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
tools/testing/selftests/bpf/prog_tests/d_path.c
tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c [new file with mode: 0644]
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/aarch64/debug-exceptions.c
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/aarch64/hypercalls.c
tools/testing/selftests/kvm/aarch64/page_fault_test.c
tools/testing/selftests/kvm/aarch64/vgic_irq.c
tools/testing/selftests/kvm/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/guest_print_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/include/aarch64/arch_timer.h
tools/testing/selftests/kvm/include/aarch64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util_base.h
tools/testing/selftests/kvm/include/riscv/processor.h
tools/testing/selftests/kvm/include/riscv/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390x/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/ucall_common.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/aarch64/ucall.c
tools/testing/selftests/kvm/lib/guest_sprintf.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/riscv/ucall.c
tools/testing/selftests/kvm/lib/s390x/ucall.c
tools/testing/selftests/kvm/lib/sparsebit.c
tools/testing/selftests/kvm/lib/string_override.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/kvm/lib/ucall_common.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/max_guest_memory_test.c
tools/testing/selftests/kvm/memslot_perf_test.c
tools/testing/selftests/kvm/riscv/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/cmma_test.c
tools/testing/selftests/kvm/s390x/debug_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/memop.c
tools/testing/selftests/kvm/s390x/tprot.c
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/steal_time.c
tools/testing/selftests/kvm/x86_64/cpuid_test.c
tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
tools/testing/selftests/kvm/x86_64/hyperv_features.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/userspace_io_test.c
tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
tools/testing/selftests/kvm/x86_64/xapic_state_test.c
tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
tools/testing/selftests/landlock/fs_test.c
tools/testing/selftests/net/fib_tests.sh
virt/kvm/Kconfig
virt/kvm/kvm_main.c

index 9fd4c95..0bbae16 100644 (file)
@@ -74,7 +74,7 @@ modules.order
 #
 # RPM spec file (make rpm-pkg)
 #
-/*.spec
+/kernel.spec
 /rpmbuild/
 
 #
index f32db1f..e43c2fd 100644 (file)
@@ -726,8 +726,8 @@ same as the one describe in :ref:`BTF_Type_String`.
 4.2 .BTF.ext section
 --------------------
 
-The .BTF.ext section encodes func_info and line_info which needs loader
-manipulation before loading into the kernel.
+The .BTF.ext section encodes func_info, line_info and CO-RE relocations
+which needs loader manipulation before loading into the kernel.
 
 The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
 and ``tools/lib/bpf/btf.c``.
@@ -745,15 +745,20 @@ The current header of .BTF.ext section::
         __u32   func_info_len;
         __u32   line_info_off;
         __u32   line_info_len;
+
+        /* optional part of .BTF.ext header */
+        __u32   core_relo_off;
+        __u32   core_relo_len;
     };
 
 It is very similar to .BTF section. Instead of type/string section, it
-contains func_info and line_info section. See :ref:`BPF_Prog_Load` for details
-about func_info and line_info record format.
+contains func_info, line_info and core_relo sub-sections.
+See :ref:`BPF_Prog_Load` for details about func_info and line_info
+record format.
 
 The func_info is organized as below.::
 
-     func_info_rec_size
+     func_info_rec_size              /* __u32 value */
      btf_ext_info_sec for section #1 /* func_info for section #1 */
      btf_ext_info_sec for section #2 /* func_info for section #2 */
      ...
@@ -773,7 +778,7 @@ Here, num_info must be greater than 0.
 
 The line_info is organized as below.::
 
-     line_info_rec_size
+     line_info_rec_size              /* __u32 value */
      btf_ext_info_sec for section #1 /* line_info for section #1 */
      btf_ext_info_sec for section #2 /* line_info for section #2 */
      ...
@@ -787,6 +792,20 @@ kernel API, the ``insn_off`` is the instruction offset in the unit of ``struct
 bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
 beginning of section (``btf_ext_info_sec->sec_name_off``).
 
+The core_relo is organized as below.::
+
+     core_relo_rec_size              /* __u32 value */
+     btf_ext_info_sec for section #1 /* core_relo for section #1 */
+     btf_ext_info_sec for section #2 /* core_relo for section #2 */
+
+``core_relo_rec_size`` specifies the size of ``bpf_core_relo``
+structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
+within a single ``btf_ext_info_sec`` describe relocations applied to
+section named by ``btf_ext_info_sec->sec_name_off``.
+
+See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
+for more information on CO-RE relocations.
+
 4.2 .BTF_ids section
 --------------------
 
index 1ff177b..aeaeb35 100644 (file)
@@ -29,6 +29,7 @@ that goes into great technical depth about the BPF Architecture.
    bpf_licensing
    test_debug
    clang-notes
+   linux-notes
    other
    redirect
 
index 450e640..44188e2 100644 (file)
@@ -240,3 +240,307 @@ The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
       Offset             Info             Type               Symbol's Value  Symbol's Name
   000000000000002c  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
   0000000000000040  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
+
+.. _btf-co-re-relocations:
+
+=================
+CO-RE Relocations
+=================
+
+From object file point of view CO-RE mechanism is implemented as a set
+of CO-RE specific relocation records. These relocation records are not
+related to ELF relocations and are encoded in .BTF.ext section.
+See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
+information on .BTF.ext structure.
+
+CO-RE relocations are applied to BPF instructions to update immediate
+or offset fields of the instruction at load time with information
+relevant for target kernel.
+
+Field to patch is selected basing on the instruction class:
+
+* For BPF_ALU, BPF_ALU64, BPF_LD `immediate` field is patched;
+* For BPF_LDX, BPF_STX, BPF_ST `offset` field is patched;
+* BPF_JMP, BPF_JMP32 instructions **should not** be patched.
+
+Relocation kinds
+================
+
+There are several kinds of CO-RE relocations that could be split in
+three groups:
+
+* Field-based - patch instruction with field related information, e.g.
+  change offset field of the BPF_LDX instruction to reflect offset
+  of a specific structure field in the target kernel.
+
+* Type-based - patch instruction with type related information, e.g.
+  change immediate field of the BPF_ALU move instruction to 0 or 1 to
+  reflect if specific type is present in the target kernel.
+
+* Enum-based - patch instruction with enum related information, e.g.
+  change immediate field of the BPF_LD_IMM64 instruction to reflect
+  value of a specific enum literal in the target kernel.
+
+The complete list of relocation kinds is represented by the following enum:
+
+.. code-block:: c
+
+ enum bpf_core_relo_kind {
+       BPF_CORE_FIELD_BYTE_OFFSET = 0,  /* field byte offset */
+       BPF_CORE_FIELD_BYTE_SIZE   = 1,  /* field size in bytes */
+       BPF_CORE_FIELD_EXISTS      = 2,  /* field existence in target kernel */
+       BPF_CORE_FIELD_SIGNED      = 3,  /* field signedness (0 - unsigned, 1 - signed) */
+       BPF_CORE_FIELD_LSHIFT_U64  = 4,  /* bitfield-specific left bitshift */
+       BPF_CORE_FIELD_RSHIFT_U64  = 5,  /* bitfield-specific right bitshift */
+       BPF_CORE_TYPE_ID_LOCAL     = 6,  /* type ID in local BPF object */
+       BPF_CORE_TYPE_ID_TARGET    = 7,  /* type ID in target kernel */
+       BPF_CORE_TYPE_EXISTS       = 8,  /* type existence in target kernel */
+       BPF_CORE_TYPE_SIZE         = 9,  /* type size in bytes */
+       BPF_CORE_ENUMVAL_EXISTS    = 10, /* enum value existence in target kernel */
+       BPF_CORE_ENUMVAL_VALUE     = 11, /* enum value integer value */
+       BPF_CORE_TYPE_MATCHES      = 12, /* type match in target kernel */
+ };
+
+Notes:
+
+* ``BPF_CORE_FIELD_LSHIFT_U64`` and ``BPF_CORE_FIELD_RSHIFT_U64`` are
+  supposed to be used to read bitfield values using the following
+  algorithm:
+
+  .. code-block:: c
+
+     // To read bitfield ``f`` from ``struct s``
+     is_signed = relo(s->f, BPF_CORE_FIELD_SIGNED)
+     off = relo(s->f, BPF_CORE_FIELD_BYTE_OFFSET)
+     sz  = relo(s->f, BPF_CORE_FIELD_BYTE_SIZE)
+     l   = relo(s->f, BPF_CORE_FIELD_LSHIFT_U64)
+     r   = relo(s->f, BPF_CORE_FIELD_RSHIFT_U64)
+     // define ``v`` as signed or unsigned integer of size ``sz``
+     v = *({s|u}<sz> *)((void *)s + off)
+     v <<= l
+     v >>= r
+
+* The ``BPF_CORE_TYPE_MATCHES`` queries matching relation, defined as
+  follows:
+
+  * for integers: types match if size and signedness match;
+  * for arrays & pointers: target types are recursively matched;
+  * for structs & unions:
+
+    * local members need to exist in target with the same name;
+
+    * for each member we recursively check match unless it is already behind a
+      pointer, in which case we only check matching names and compatible kind;
+
+  * for enums:
+
+    * local variants have to have a match in target by symbolic name (but not
+      numeric value);
+
+    * size has to match (but enum may match enum64 and vice versa);
+
+  * for function pointers:
+
+    * number and position of arguments in local type has to match target;
+    * for each argument and the return value we recursively check match.
+
+CO-RE Relocation Record
+=======================
+
+Relocation record is encoded as the following structure:
+
+.. code-block:: c
+
+ struct bpf_core_relo {
+       __u32 insn_off;
+       __u32 type_id;
+       __u32 access_str_off;
+       enum bpf_core_relo_kind kind;
+ };
+
+* ``insn_off`` - instruction offset (in bytes) within a code section
+  associated with this relocation;
+
+* ``type_id`` - BTF type ID of the "root" (containing) entity of a
+  relocatable type or field;
+
+* ``access_str_off`` - offset into corresponding .BTF string section.
+  String interpretation depends on specific relocation kind:
+
+  * for field-based relocations, string encodes an accessed field using
+    a sequence of field and array indices, separated by colon (:). It's
+    conceptually very close to LLVM's `getelementptr <GEP_>`_ instruction's
+    arguments for identifying offset to a field. For example, consider the
+    following C code:
+
+    .. code-block:: c
+
+       struct sample {
+           int a;
+           int b;
+           struct { int c[10]; };
+       } __attribute__((preserve_access_index));
+       struct sample *s;
+
+    * Access to ``s[0].a`` would be encoded as ``0:0``:
+
+      * ``0``: first element of ``s`` (as if ``s`` is an array);
+      * ``0``: index of field ``a`` in ``struct sample``.
+
+    * Access to ``s->a`` would be encoded as ``0:0`` as well.
+    * Access to ``s->b`` would be encoded as ``0:1``:
+
+      * ``0``: first element of ``s``;
+      * ``1``: index of field ``b`` in ``struct sample``.
+
+    * Access to ``s[1].c[5]`` would be encoded as ``1:2:0:5``:
+
+      * ``1``: second element of ``s``;
+      * ``2``: index of anonymous structure field in ``struct sample``;
+      * ``0``: index of field ``c`` in anonymous structure;
+      * ``5``: access to array element #5.
+
+  * for type-based relocations, string is expected to be just "0";
+
+  * for enum value-based relocations, string contains an index of enum
+     value within its enum type;
+
+* ``kind`` - one of ``enum bpf_core_relo_kind``.
+
+.. _GEP: https://llvm.org/docs/LangRef.html#getelementptr-instruction
+
+.. _btf_co_re_relocation_examples:
+
+CO-RE Relocation Examples
+=========================
+
+For the following C code:
+
+.. code-block:: c
+
+ struct foo {
+   int a;
+   int b;
+   unsigned c:15;
+ } __attribute__((preserve_access_index));
+
+ enum bar { U, V };
+
+With the following BTF definitions:
+
+.. code-block::
+
+ ...
+ [2] STRUCT 'foo' size=8 vlen=2
+        'a' type_id=3 bits_offset=0
+        'b' type_id=3 bits_offset=32
+        'c' type_id=4 bits_offset=64 bitfield_size=15
+ [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [4] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+ ...
+ [16] ENUM 'bar' encoding=UNSIGNED size=4 vlen=2
+        'U' val=0
+        'V' val=1
+
+Field offset relocations are generated automatically when
+``__attribute__((preserve_access_index))`` is used, for example:
+
+.. code-block:: c
+
+  void alpha(struct foo *s, volatile unsigned long *g) {
+    *g = s->a;
+    s->a = 1;
+  }
+
+  00 <alpha>:
+    0:  r3 = *(s32 *)(r1 + 0x0)
+           00:  CO-RE <byte_off> [2] struct foo::a (0:0)
+    1:  *(u64 *)(r2 + 0x0) = r3
+    2:  *(u32 *)(r1 + 0x0) = 0x1
+           10:  CO-RE <byte_off> [2] struct foo::a (0:0)
+    3:  exit
+
+
+All relocation kinds could be requested via built-in functions.
+E.g. field-based relocations:
+
+.. code-block:: c
+
+  void bravo(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_field_info(s->b, 0 /* field byte offset */);
+    *g = __builtin_preserve_field_info(s->b, 1 /* field byte size */);
+    *g = __builtin_preserve_field_info(s->b, 2 /* field existence */);
+    *g = __builtin_preserve_field_info(s->b, 3 /* field signedness */);
+    *g = __builtin_preserve_field_info(s->c, 4 /* bitfield left shift */);
+    *g = __builtin_preserve_field_info(s->c, 5 /* bitfield right shift */);
+  }
+
+  20 <bravo>:
+     4:     r1 = 0x4
+            20:  CO-RE <byte_off> [2] struct foo::b (0:1)
+     5:     *(u64 *)(r2 + 0x0) = r1
+     6:     r1 = 0x4
+            30:  CO-RE <byte_sz> [2] struct foo::b (0:1)
+     7:     *(u64 *)(r2 + 0x0) = r1
+     8:     r1 = 0x1
+            40:  CO-RE <field_exists> [2] struct foo::b (0:1)
+     9:     *(u64 *)(r2 + 0x0) = r1
+    10:     r1 = 0x1
+            50:  CO-RE <signed> [2] struct foo::b (0:1)
+    11:     *(u64 *)(r2 + 0x0) = r1
+    12:     r1 = 0x31
+            60:  CO-RE <lshift_u64> [2] struct foo::c (0:2)
+    13:     *(u64 *)(r2 + 0x0) = r1
+    14:     r1 = 0x31
+            70:  CO-RE <rshift_u64> [2] struct foo::c (0:2)
+    15:     *(u64 *)(r2 + 0x0) = r1
+    16:     exit
+
+
+Type-based relocations:
+
+.. code-block:: c
+
+  void charlie(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_type_info(*s, 0 /* type existence */);
+    *g = __builtin_preserve_type_info(*s, 1 /* type size */);
+    *g = __builtin_preserve_type_info(*s, 2 /* type matches */);
+    *g = __builtin_btf_type_id(*s, 0 /* type id in this object file */);
+    *g = __builtin_btf_type_id(*s, 1 /* type id in target kernel */);
+  }
+
+  88 <charlie>:
+    17:     r1 = 0x1
+            88:  CO-RE <type_exists> [2] struct foo
+    18:     *(u64 *)(r2 + 0x0) = r1
+    19:     r1 = 0xc
+            98:  CO-RE <type_size> [2] struct foo
+    20:     *(u64 *)(r2 + 0x0) = r1
+    21:     r1 = 0x1
+            a8:  CO-RE <type_matches> [2] struct foo
+    22:     *(u64 *)(r2 + 0x0) = r1
+    23:     r1 = 0x2 ll
+            b8:  CO-RE <local_type_id> [2] struct foo
+    25:     *(u64 *)(r2 + 0x0) = r1
+    26:     r1 = 0x2 ll
+            d0:  CO-RE <target_type_id> [2] struct foo
+    28:     *(u64 *)(r2 + 0x0) = r1
+    29:     exit
+
+Enum-based relocations:
+
+.. code-block:: c
+
+  void delta(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_enum_value(*(enum bar *)U, 0 /* enum literal existence */);
+    *g = __builtin_preserve_enum_value(*(enum bar *)V, 1 /* enum literal value */);
+  }
+
+  f0 <delta>:
+    30:     r1 = 0x1 ll
+            f0:  CO-RE <enumval_exists> [16] enum bar::U = 0
+    32:     *(u64 *)(r2 + 0x0) = r1
+    33:     r1 = 0x1 ll
+            108:  CO-RE <enumval_value> [16] enum bar::V = 1
+    35:     *(u64 *)(r2 + 0x0) = r1
+    36:     exit
diff --git a/Documentation/bpf/standardization/abi.rst b/Documentation/bpf/standardization/abi.rst
new file mode 100644 (file)
index 0000000..0c2e10e
--- /dev/null
@@ -0,0 +1,25 @@
+.. contents::
+.. sectnum::
+
+===================================================
+BPF ABI Recommended Conventions and Guidelines v1.0
+===================================================
+
+This is version 1.0 of an informational document containing recommended
+conventions and guidelines for producing portable BPF program binaries.
+
+Registers and calling convention
+================================
+
+BPF has 10 general purpose registers and a read-only frame pointer register,
+all of which are 64-bits wide.
+
+The BPF calling convention is defined as:
+
+* R0: return value from function calls, and exit value for BPF programs
+* R1 - R5: arguments for function calls
+* R6 - R9: callee saved registers that function calls will preserve
+* R10: read-only frame pointer to access stack
+
+R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
+necessary across calls.
index 09c6ba0..a50c3ba 100644 (file)
@@ -12,7 +12,7 @@ for the working group charter, documents, and more.
    :maxdepth: 1
 
    instruction-set
-   linux-notes
+   abi
 
 .. Links:
 .. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
index 4f73e9d..c5d53a6 100644 (file)
@@ -1,11 +1,11 @@
 .. contents::
 .. sectnum::
 
-========================================
-eBPF Instruction Set Specification, v1.0
-========================================
+=======================================
+BPF Instruction Set Specification, v1.0
+=======================================
 
-This document specifies version 1.0 of the eBPF instruction set.
+This document specifies version 1.0 of the BPF instruction set.
 
 Documentation conventions
 =========================
@@ -97,26 +97,10 @@ Definitions
     A:          10000110
     B: 11111111 10000110
 
-Registers and calling convention
-================================
-
-eBPF has 10 general purpose registers and a read-only frame pointer register,
-all of which are 64-bits wide.
-
-The eBPF calling convention is defined as:
-
-* R0: return value from function calls, and exit value for eBPF programs
-* R1 - R5: arguments for function calls
-* R6 - R9: callee saved registers that function calls will preserve
-* R10: read-only frame pointer to access stack
-
-R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
-necessary across calls.
-
 Instruction encoding
 ====================
 
-eBPF has two instruction encodings:
+BPF has two instruction encodings:
 
 * the basic instruction encoding, which uses 64 bits to encode an instruction
 * the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
@@ -260,7 +244,7 @@ BPF_END    0xd0   0        byte swap operations (see `Byte swap instructions`_ b
 =========  =====  =======  ==========================================================
 
 Underflow and overflow are allowed during arithmetic operations, meaning
-the 64-bit or 32-bit value will wrap. If eBPF program execution would
+the 64-bit or 32-bit value will wrap. If BPF program execution would
 result in division by zero, the destination register is instead set to zero.
 If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
 the destination register is unchanged whereas for ``BPF_ALU`` the upper
@@ -373,7 +357,7 @@ BPF_JNE   0x5    any  PC += offset if dst != src
 BPF_JSGT  0x6    any  PC += offset if dst > src                    signed
 BPF_JSGE  0x7    any  PC += offset if dst >= src                   signed
 BPF_CALL  0x8    0x0  call helper function by address              see `Helper functions`_
-BPF_CALL  0x8    0x1  call PC += offset                            see `Program-local functions`_
+BPF_CALL  0x8    0x1  call PC += imm                               see `Program-local functions`_
 BPF_CALL  0x8    0x2  call helper function by BTF ID               see `Helper functions`_
 BPF_EXIT  0x9    0x0  return                                       BPF_JMP only
 BPF_JLT   0xa    any  PC += offset if dst < src                    unsigned
@@ -382,7 +366,7 @@ BPF_JSLT  0xc    any  PC += offset if dst < src                    signed
 BPF_JSLE  0xd    any  PC += offset if dst <= src                   signed
 ========  =====  ===  ===========================================  =========================================
 
-The eBPF program needs to store the return value into register R0 before doing a
+The BPF program needs to store the return value into register R0 before doing a
 ``BPF_EXIT``.
 
 Example:
@@ -424,8 +408,8 @@ Program-local functions
 ~~~~~~~~~~~~~~~~~~~~~~~
 Program-local functions are functions exposed by the same BPF program as the
 caller, and are referenced by offset from the call instruction, similar to
-``BPF_JA``.  A ``BPF_EXIT`` within the program-local function will return to
-the caller.
+``BPF_JA``.  The offset is encoded in the imm field of the call instruction.
+A ``BPF_EXIT`` within the program-local function will return to the caller.
 
 Load and store instructions
 ===========================
@@ -502,9 +486,9 @@ Atomic operations
 
 Atomic operations are operations that operate on memory and can not be
 interrupted or corrupted by other access to the same memory region
-by other eBPF programs or means outside of this specification.
+by other BPF programs or means outside of this specification.
 
-All atomic operations supported by eBPF are encoded as store operations
+All atomic operations supported by BPF are encoded as store operations
 that use the ``BPF_ATOMIC`` mode modifier as follows:
 
 * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
@@ -594,7 +578,7 @@ where
 Maps
 ~~~~
 
-Maps are shared memory regions accessible by eBPF programs on some platforms.
+Maps are shared memory regions accessible by BPF programs on some platforms.
 A map can have various semantics as defined in a separate document, and may or
 may not have a single contiguous memory region, but the 'map_val(map)' is
 currently only defined for maps that do have a single contiguous memory region.
@@ -616,6 +600,6 @@ identified by the given id.
 Legacy BPF Packet access instructions
 -------------------------------------
 
-eBPF previously introduced special instructions for access to packet data that were
+BPF previously introduced special instructions for access to packet data that were
 carried over from classic BPF. However, these instructions are
 deprecated and should no longer be used.
index f4acf9c..382818a 100644 (file)
@@ -41,8 +41,8 @@ Support
 Architectures
 ~~~~~~~~~~~~~
 
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
 
 Compilers
 ~~~~~~~~~
index ec79b72..042d4dc 100644 (file)
@@ -269,6 +269,7 @@ examples:
                 port {
                     ov7251_ep: endpoint {
                         data-lanes = <0 1>;
+                        link-frequencies = /bits/ 64 <240000000 319200000>;
                         remote-endpoint = <&csiphy3_ep>;
                     };
                 };
index fdb4212..ab69f41 100644 (file)
@@ -135,9 +135,10 @@ patternProperties:
         minimum: 0x1
         maximum: 0xff
         description: |
-          Dynamic address to be assigned to this device. This property is only
-          valid if the I3C device has a static address (first cell of the reg
-          property != 0).
+          Dynamic address to be assigned to this device. In case static address is
+          present (first cell of the reg property != 0), this address is assigned
+          through SETDASA. If static address is not present, this address is assigned
+          through SETNEWDA after assigning a temporary address via ENTDAA.
 
     required:
       - reg
@@ -163,12 +164,18 @@ examples:
             pagesize = <0x8>;
         };
 
-        /* I3C device with a static I2C address. */
+        /* I3C device with a static I2C address and assigned address. */
         thermal_sensor: sensor@68,39200144004 {
             reg = <0x68 0x392 0x144004>;
             assigned-address = <0xa>;
         };
 
+        /* I3C device with only assigned address. */
+        pressure_sensor: sensor@0,39200124004 {
+            reg = <0x0 0x392 0x124000>;
+            assigned-address = <0xc>;
+        };
+
         /*
          * I3C device without a static I2C address but requiring
          * resources described in the DT.
index 9ddba7f..5b1769c 100644 (file)
@@ -4,14 +4,14 @@
 $id: http://devicetree.org/schemas/input/azoteq,iqs7222.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Azoteq IQS7222A/B/C Capacitive Touch Controller
+title: Azoteq IQS7222A/B/C/D Capacitive Touch Controller
 
 maintainers:
   - Jeff LaBundy <jeff@labundy.com>
 
 description: |
-  The Azoteq IQS7222A, IQS7222B and IQS7222C are multichannel capacitive touch
-  controllers that feature additional sensing capabilities.
+  The Azoteq IQS7222A, IQS7222B, IQS7222C and IQS7222D are multichannel
+  capacitive touch controllers that feature additional sensing capabilities.
 
   Link to datasheets: https://www.azoteq.com/
 
@@ -21,6 +21,7 @@ properties:
       - azoteq,iqs7222a
       - azoteq,iqs7222b
       - azoteq,iqs7222c
+      - azoteq,iqs7222d
 
   reg:
     maxItems: 1
@@ -173,6 +174,152 @@ properties:
     maximum: 3000
     description: Specifies the report rate (in ms) during ultra-low-power mode.
 
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+  touchscreen-inverted-x: true
+  touchscreen-inverted-y: true
+  touchscreen-swapped-x-y: true
+
+  trackpad:
+    type: object
+    description: Represents all channels associated with the trackpad.
+
+    properties:
+      azoteq,channel-select:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 13
+        description:
+          Specifies the order of the channels that participate in the trackpad.
+          Specify 255 to omit a given channel for the purpose of mapping a non-
+          rectangular trackpad.
+
+      azoteq,num-rows:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 12
+        description: Specifies the number of rows that comprise the trackpad.
+
+      azoteq,num-cols:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 12
+        description: Specifies the number of columns that comprise the trackpad.
+
+      azoteq,top-speed:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 4
+        minimum: 0
+        maximum: 1020
+        description:
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is no longer applied.
+
+      azoteq,bottom-speed:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description:
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is linearly reduced.
+
+      azoteq,use-prox:
+        type: boolean
+        description:
+          Directs the trackpad to respond to the proximity states of the
+          selected channels instead of their corresponding touch states.
+          Note the trackpad cannot report granular coordinates during a
+          state of proximity.
+
+    patternProperties:
+      "^azoteq,lower-cal-(x|y)$":
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's lower starting points.
+
+      "^azoteq,upper-cal-(x|y)$":
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's upper starting points.
+
+      "^event-(press|tap|(swipe|flick)-(x|y)-(pos|neg))$":
+        type: object
+        $ref: input.yaml#
+        description:
+          Represents a press or gesture event reported by the trackpad. Specify
+          'linux,code' under the press event to report absolute coordinates.
+
+        properties:
+          linux,code: true
+
+          azoteq,gesture-angle-tighten:
+            type: boolean
+            description:
+              Limits the tangent of the gesture angle to 0.5 (axial gestures
+              only). If specified in one direction, the effect is applied in
+              either direction.
+
+          azoteq,gesture-max-ms:
+            multipleOf: 16
+            minimum: 0
+            maximum: 4080
+            description:
+              Specifies the length of time (in ms) within which a tap, swipe
+              or flick gesture must be completed in order to be acknowledged
+              by the device. The number specified for any one swipe or flick
+              gesture applies to all other swipe or flick gestures.
+
+          azoteq,gesture-min-ms:
+            multipleOf: 16
+            minimum: 0
+            maximum: 4080
+            description:
+              Specifies the length of time (in ms) for which a tap gesture must
+              be held in order to be acknowledged by the device.
+
+          azoteq,gesture-dist:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the distance (in coordinates) across which a swipe or
+              flick gesture must travel in order to be acknowledged by the
+              device. The number specified for any one swipe or flick gesture
+              applies to all remaining swipe or flick gestures.
+
+              For tap gestures, this property specifies the distance from the
+              original point of contact across which the contact is permitted
+              to travel before the gesture is rejected by the device.
+
+          azoteq,gpio-select:
+            $ref: /schemas/types.yaml#/definitions/uint32-array
+            minItems: 1
+            maxItems: 3
+            items:
+              minimum: 0
+              maximum: 2
+            description: |
+              Specifies one or more GPIO mapped to the event as follows:
+              0: GPIO0
+              1: GPIO3
+              2: GPIO4
+
+              Note that although multiple events can be mapped to a single
+              GPIO, they must all be of the same type (proximity, touch or
+              trackpad gesture).
+
+        additionalProperties: false
+
+    required:
+      - azoteq,channel-select
+
+    additionalProperties: false
+
 patternProperties:
   "^cycle-[0-9]$":
     type: object
@@ -288,6 +435,10 @@ patternProperties:
           Activates the reference channel in response to proximity events
           instead of touch events.
 
+      azoteq,counts-filt-enable:
+        type: boolean
+        description: Applies counts filtering to the channel.
+
       azoteq,ati-band:
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 1, 2, 3]
@@ -432,12 +583,12 @@ patternProperties:
             description: |
               Specifies one or more GPIO mapped to the event as follows:
               0: GPIO0
-              1: GPIO3 (IQS7222C only)
-              2: GPIO4 (IQS7222C only)
+              1: GPIO3
+              2: GPIO4
 
               Note that although multiple events can be mapped to a single
               GPIO, they must all be of the same type (proximity, touch or
-              slider gesture).
+              slider/trackpad gesture).
 
           azoteq,thresh:
             $ref: /schemas/types.yaml#/definitions/uint32
@@ -521,16 +672,16 @@ patternProperties:
         minimum: 0
         maximum: 65535
         description:
-          Specifies the speed of movement after which coordinate filtering is
-          no longer applied.
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is no longer applied.
 
       azoteq,bottom-speed:
         $ref: /schemas/types.yaml#/definitions/uint32
         minimum: 0
         maximum: 255
         description:
-          Specifies the speed of movement after which coordinate filtering is
-          linearly reduced.
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is linearly reduced.
 
       azoteq,bottom-beta:
         $ref: /schemas/types.yaml#/definitions/uint32
@@ -595,10 +746,10 @@ patternProperties:
             minimum: 0
             maximum: 4080
             description:
-              Specifies the distance across which a swipe or flick gesture must
-              travel in order to be acknowledged by the device. The number spec-
-              ified for any one swipe or flick gesture applies to all remaining
-              swipe or flick gestures.
+              Specifies the distance (in coordinates) across which a swipe or
+              flick gesture must travel in order to be acknowledged by the
+              device. The number specified for any one swipe or flick gesture
+              applies to all remaining swipe or flick gestures.
 
           azoteq,gpio-select:
             $ref: /schemas/types.yaml#/definitions/uint32-array
@@ -610,8 +761,8 @@ patternProperties:
             description: |
               Specifies one or more GPIO mapped to the event as follows:
               0: GPIO0
-              1: GPIO3 (IQS7222C only)
-              2: GPIO4 (IQS7222C only)
+              1: GPIO3
+              2: GPIO4
 
               Note that although multiple events can be mapped to a single
               GPIO, they must all be of the same type (proximity, touch or
@@ -629,8 +780,8 @@ patternProperties:
     description: |
       Represents a GPIO mapped to one or more events as follows:
       gpio-0: GPIO0
-      gpio-1: GPIO3 (IQS7222C only)
-      gpio-2: GPIO4 (IQS7222C only)
+      gpio-1: GPIO3
+      gpio-2: GPIO4
 
     allOf:
       - $ref: ../pinctrl/pincfg-node.yaml#
@@ -641,11 +792,53 @@ patternProperties:
     additionalProperties: false
 
 allOf:
+  - $ref: touchscreen/touchscreen.yaml#
+
   - if:
       properties:
         compatible:
           contains:
-            const: azoteq,iqs7222b
+            enum:
+              - azoteq,iqs7222a
+              - azoteq,iqs7222b
+              - azoteq,iqs7222c
+
+    then:
+      properties:
+        touchscreen-size-x: false
+        touchscreen-size-y: false
+        touchscreen-inverted-x: false
+        touchscreen-inverted-y: false
+        touchscreen-swapped-x-y: false
+
+        trackpad: false
+
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
+          properties:
+            azoteq,counts-filt-enable: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - azoteq,iqs7222b
+              - azoteq,iqs7222c
+
+    then:
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
+          properties:
+            azoteq,ulp-allow: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - azoteq,iqs7222b
+              - azoteq,iqs7222d
 
     then:
       patternProperties:
@@ -657,13 +850,22 @@ allOf:
           properties:
             azoteq,ref-select: false
 
+        "^slider-[0-1]$": false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7222b
+
+    then:
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
           patternProperties:
             "^event-(prox|touch)$":
               properties:
                 azoteq,gpio-select: false
 
-        "^slider-[0-1]$": false
-
         "^gpio-[0-2]$": false
 
   - if:
@@ -704,10 +906,6 @@ allOf:
 
     else:
       patternProperties:
-        "^channel-([0-9]|1[0-9])$":
-          properties:
-            azoteq,ulp-allow: false
-
         "^slider-[0-1]$":
           patternProperties:
             "^event-(press|tap|(swipe|flick)-(pos|neg))$":
diff --git a/Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml b/Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml
new file mode 100644 (file)
index 0000000..8cf371b
--- /dev/null
@@ -0,0 +1,769 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/azoteq,iqs7211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+
+maintainers:
+  - Jeff LaBundy <jeff@labundy.com>
+
+description: |
+  The Azoteq IQS7210A, IQS7211A and IQS7211E trackpad and touchscreen control-
+  lers employ projected-capacitance sensing and can track two contacts.
+
+  Link to datasheets: https://www.azoteq.com/
+
+properties:
+  compatible:
+    enum:
+      - azoteq,iqs7210a
+      - azoteq,iqs7211a
+      - azoteq,iqs7211e
+
+  reg:
+    maxItems: 1
+
+  irq-gpios:
+    maxItems: 1
+    description:
+      Specifies the GPIO connected to the device's active-low RDY output. The
+      pin doubles as the IQS7211E's active-low MCLR input, in which case this
+      GPIO must be configured as open-drain.
+
+  reset-gpios:
+    maxItems: 1
+    description:
+      Specifies the GPIO connected to the device's active-low MCLR input. The
+      device is temporarily held in hardware reset prior to initialization if
+      this property is present.
+
+  azoteq,forced-comms:
+    type: boolean
+    description:
+      Enables forced communication; to be used with host adapters that cannot
+      tolerate clock stretching.
+
+  azoteq,forced-comms-default:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description:
+      Indicates if the device's OTP memory enables (1) or disables (0) forced
+      communication by default. Specifying this property can expedite startup
+      time if the default value is known.
+
+      If this property is not specified, communication is not initiated until
+      the device asserts its RDY pin shortly after exiting hardware reset. At
+      that point, forced communication is either enabled or disabled based on
+      the presence or absence of the 'azoteq,forced-comms' property.
+
+  azoteq,rate-active-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during active mode.
+
+  azoteq,rate-touch-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during idle-touch mode.
+
+  azoteq,rate-idle-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during idle mode.
+
+  azoteq,rate-lp1-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during low-power mode 1.
+
+  azoteq,rate-lp2-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during low-power mode 2.
+
+  azoteq,timeout-active-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from active mode to idle or idle-touch modes.
+
+  azoteq,timeout-touch-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from idle-touch mode to idle mode.
+
+  azoteq,timeout-idle-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from idle mode to low-power mode 1.
+
+  azoteq,timeout-lp1-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from low-power mode 1 to low-power mode 2.
+
+  azoteq,timeout-lp2-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the rate (in ms) at which the trackpad reference values
+      are updated during low-power modes 1 and 2.
+
+  azoteq,timeout-ati-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the delay (in ms) before the automatic tuning implementation
+      (ATI) is retried in the event it fails to complete.
+
+  azoteq,timeout-comms-ms:
+    minimum: 0
+    maximum: 65535
+    description:
+      Specifies the delay (in ms) before a communication window is closed.
+
+  azoteq,timeout-press-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the length of time (in ms) to wait before automatically
+      releasing a press event. Specify zero to allow the press state to
+      persist indefinitely.
+
+  azoteq,fosc-freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description: |
+      Specifies the device's core clock frequency as follows:
+      0: 14 MHz
+      1: 18 MHz
+
+  azoteq,fosc-trim:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 15
+    description: Specifies the device's core clock frequency trim.
+
+  azoteq,num-contacts:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 2
+    default: 0
+    description: Specifies the number of contacts reported by the device.
+
+  azoteq,contact-split:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the contact (finger) split factor.
+
+  azoteq,trim-x:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the horizontal trim width.
+
+  azoteq,trim-y:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the vertical trim height.
+
+  trackpad:
+    type: object
+    description: Represents all channels associated with the trackpad.
+
+    properties:
+      azoteq,rx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 8
+        items:
+          minimum: 0
+          maximum: 7
+        description:
+          Specifies the order of the CRx pin(s) associated with the trackpad.
+
+      azoteq,tx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 11
+        description:
+          Specifies the order of the CTx pin(s) associated with the trackpad.
+
+      azoteq,channel-select:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 36
+        items:
+          minimum: 0
+          maximum: 255
+        description: |
+          Specifies the channels mapped to each cycle in the following order:
+          Cycle 0, slot 0
+          Cycle 0, slot 1
+          Cycle 1, slot 0
+          Cycle 1, slot 1
+          ...and so on. Specify 255 to disable a given slot.
+
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the trackpad's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the trackpad's ATI target.
+
+      azoteq,touch-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's touch entrance factor.
+
+      azoteq,touch-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's touch exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's stationary touch threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's conversion frequency fraction.
+
+    patternProperties:
+      "^event-(tap(-double|-triple)?|hold|palm|swipe-(x|y)-(pos|neg)(-hold)?)$":
+        type: object
+        $ref: ../input.yaml#
+        description:
+          Represents a gesture event reported by the trackpad. In the case of
+          axial gestures, the duration or distance specified in one direction
+          applies to both directions along the same axis.
+
+        properties:
+          linux,code: true
+
+          azoteq,gesture-max-ms:
+            minimum: 0
+            maximum: 65535
+            description: Specifies the maximum duration of tap/swipe gestures.
+
+          azoteq,gesture-mid-ms:
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the maximum duration between subsequent tap gestures
+              (IQS7211E only).
+
+          azoteq,gesture-min-ms:
+            minimum: 0
+            maximum: 65535
+            description: Specifies the minimum duration of hold gestures.
+
+          azoteq,gesture-dist:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the minimum (swipe) or maximum (tap and hold) distance
+              a finger may travel to be considered a gesture.
+
+          azoteq,gesture-dist-rep:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the minimum distance a finger must travel to elicit a
+              repeated swipe gesture (IQS7211E only).
+
+          azoteq,gesture-angle:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 75
+            description:
+              Specifies the maximum angle (in degrees) a finger may travel to
+              be considered a swipe gesture.
+
+          azoteq,thresh:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 42
+            description: Specifies the palm gesture threshold (IQS7211E only).
+
+        additionalProperties: false
+
+    dependencies:
+      azoteq,rx-enable: ["azoteq,tx-enable"]
+      azoteq,tx-enable: ["azoteq,rx-enable"]
+      azoteq,channel-select: ["azoteq,rx-enable"]
+
+    additionalProperties: false
+
+  alp:
+    type: object
+    $ref: ../input.yaml#
+    description: Represents the alternate low-power channel (ALP).
+
+    properties:
+      azoteq,rx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 8
+        items:
+          minimum: 0
+          maximum: 7
+        description:
+          Specifies the CRx pin(s) associated with the ALP in no particular
+          order.
+
+      azoteq,tx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 11
+        description:
+          Specifies the CTx pin(s) associated with the ALP in no particular
+          order.
+
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the ALP's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the ALP's ATI target.
+
+      azoteq,ati-base:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 8
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's ATI base.
+
+      azoteq,ati-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the ALP's ATI mode as follows:
+          0: Partial
+          1: Full
+
+      azoteq,sense-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the ALP's sensing mode as follows:
+          0: Self capacitive
+          1: Mutual capacitive
+
+      azoteq,debounce-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's debounce entrance factor.
+
+      azoteq,debounce-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's debounce exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the ALP's proximity or touch threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's conversion frequency fraction.
+
+      linux,code: true
+
+    additionalProperties: false
+
+  button:
+    type: object
+    description: Represents the inductive or capacitive button.
+
+    properties:
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the button's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the button's ATI target.
+
+      azoteq,ati-base:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 8
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's ATI base.
+
+      azoteq,ati-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the button's ATI mode as follows:
+          0: Partial
+          1: Full
+
+      azoteq,sense-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2]
+        description: |
+          Specifies the button's sensing mode as follows:
+          0: Self capacitive
+          1: Mutual capacitive
+          2: Inductive
+
+      azoteq,touch-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's touch entrance factor.
+
+      azoteq,touch-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's touch exit factor.
+
+      azoteq,debounce-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's debounce entrance factor.
+
+      azoteq,debounce-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's debounce exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the button's proximity threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's conversion frequency fraction.
+
+    patternProperties:
+      "^event-(prox|touch)$":
+        type: object
+        $ref: ../input.yaml#
+        description:
+          Represents a proximity or touch event reported by the button.
+
+        properties:
+          linux,code: true
+
+        additionalProperties: false
+
+    additionalProperties: false
+
+  wakeup-source: true
+
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+  touchscreen-inverted-x: true
+  touchscreen-inverted-y: true
+  touchscreen-swapped-x-y: true
+
+dependencies:
+  touchscreen-size-x: ["azoteq,num-contacts"]
+  touchscreen-size-y: ["azoteq,num-contacts"]
+  touchscreen-inverted-x: ["azoteq,num-contacts"]
+  touchscreen-inverted-y: ["azoteq,num-contacts"]
+  touchscreen-swapped-x-y: ["azoteq,num-contacts"]
+
+required:
+  - compatible
+  - reg
+  - irq-gpios
+
+additionalProperties: false
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7210a
+
+    then:
+      properties:
+        alp:
+          properties:
+            azoteq,rx-enable:
+              maxItems: 4
+              items:
+                minimum: 4
+
+    else:
+      properties:
+        azoteq,timeout-press-ms: false
+
+        alp:
+          properties:
+            azoteq,ati-mode: false
+
+        button: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7211e
+
+    then:
+      properties:
+        reset-gpios: false
+
+        trackpad:
+          properties:
+            azoteq,tx-enable:
+              maxItems: 13
+              items:
+                maximum: 12
+
+        alp:
+          properties:
+            azoteq,tx-enable:
+              maxItems: 13
+              items:
+                maximum: 12
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/input/input.h>
+
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            touch@56 {
+                    compatible = "azoteq,iqs7210a";
+                    reg = <0x56>;
+                    irq-gpios = <&gpio 4 GPIO_ACTIVE_LOW>;
+                    reset-gpios = <&gpio 17 (GPIO_ACTIVE_LOW |
+                                             GPIO_PUSH_PULL)>;
+                    azoteq,num-contacts = <2>;
+
+                    trackpad {
+                            azoteq,rx-enable = <6>, <5>, <4>, <3>, <2>;
+                            azoteq,tx-enable = <1>, <7>, <8>, <9>, <10>;
+                    };
+
+                    button {
+                            azoteq,sense-mode = <2>;
+                            azoteq,touch-enter = <40>;
+                            azoteq,touch-exit = <36>;
+
+                            event-touch {
+                                    linux,code = <KEY_HOME>;
+                            };
+                    };
+
+                    alp {
+                            azoteq,sense-mode = <1>;
+                            linux,code = <KEY_POWER>;
+                    };
+            };
+    };
+
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/input/input.h>
+
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            touch@56 {
+                    compatible = "azoteq,iqs7211e";
+                    reg = <0x56>;
+                    irq-gpios = <&gpio 4 (GPIO_ACTIVE_LOW |
+                                          GPIO_OPEN_DRAIN)>;
+
+                    trackpad {
+                            event-tap {
+                                    linux,code = <KEY_PLAYPAUSE>;
+                            };
+
+                            event-tap-double {
+                                    linux,code = <KEY_SHUFFLE>;
+                            };
+
+                            event-tap-triple {
+                                    linux,code = <KEY_AGAIN>;
+                            };
+
+                            event-hold {
+                                    linux,code = <KEY_STOP>;
+                            };
+
+                            event-palm {
+                                    linux,code = <KEY_EXIT>;
+                            };
+
+                            event-swipe-x-pos {
+                                    linux,code = <KEY_REWIND>;
+                            };
+
+                            event-swipe-x-pos-hold {
+                                    linux,code = <KEY_PREVIOUS>;
+                            };
+
+                            event-swipe-x-neg {
+                                    linux,code = <KEY_FASTFORWARD>;
+                            };
+
+                            event-swipe-x-neg-hold {
+                                    linux,code = <KEY_NEXT>;
+                            };
+
+                            event-swipe-y-pos {
+                                    linux,code = <KEY_VOLUMEUP>;
+                            };
+
+                            event-swipe-y-pos-hold {
+                                    linux,code = <KEY_MUTE>;
+                            };
+
+                            event-swipe-y-neg {
+                                    linux,code = <KEY_VOLUMEDOWN>;
+                            };
+
+                            event-swipe-y-neg-hold {
+                                    linux,code = <KEY_MUTE>;
+                            };
+                    };
+            };
+    };
+
+...
index ef4c841..f2808cb 100644 (file)
@@ -93,6 +93,12 @@ properties:
     minimum: 1
     maximum: 255
 
+  threshold:
+    description: Allows setting the  "click"-threshold in the range from 0 to 255.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+
   touchscreen-size-x: true
   touchscreen-size-y: true
   touchscreen-fuzz-x: true
index 007adbc..9dc25d3 100644 (file)
@@ -24,6 +24,8 @@ properties:
     maxItems: 1
   reset-gpios:
     maxItems: 1
+  vdd-supply:
+    description: Power supply regulator for the chip
   touchscreen-size-x: true
   touchscreen-size-y: true
   touchscreen-inverted-x: true
index fdd0289..07f9dd6 100644 (file)
@@ -52,6 +52,11 @@ properties:
   touchscreen-swapped-x-y: true
   touchscreen-max-pressure: true
 
+  linux,keycodes:
+    description: Keycodes for the touch keys
+    minItems: 1
+    maxItems: 15
+
 additionalProperties: false
 
 required:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
deleted file mode 100644 (file)
index 977d7ed..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-STMicroelectronics STi System Configuration Controlled IRQs
------------------------------------------------------------
-
-On STi based systems; External, CTI (Core Sight), PMU (Performance Management),
-and PL310 L2 Cache IRQs are controlled using System Configuration registers.
-This driver is used to unmask them prior to use.
-
-Required properties:
-- compatible   : Should be "st,stih407-irq-syscfg"
-- st,syscfg    : Phandle to Cortex-A9 IRQ system config registers
-- st,irq-device        : Array of IRQs to enable - should be 2 in length
-- st,fiq-device        : Array of FIQs to enable - should be 2 in length
-
-Optional properties:
-- st,invert-ext        : External IRQs can be inverted at will.  This property inverts
-                 these IRQs using bitwise logic.  A number of defines have been
-                 provided for convenience:
-                       ST_IRQ_SYSCFG_EXT_1_INV
-                       ST_IRQ_SYSCFG_EXT_2_INV
-                       ST_IRQ_SYSCFG_EXT_3_INV
-Example:
-
-irq-syscfg {
-       compatible    = "st,stih407-irq-syscfg";
-       st,syscfg     = <&syscfg_cpu>;
-       st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
-                       <ST_IRQ_SYSCFG_PMU_1>;
-       st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
-                       <ST_IRQ_SYSCFG_DISABLED>;
-};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml
new file mode 100644 (file)
index 0000000..2b153d7
--- /dev/null
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/st,stih407-irq-syscfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STi System Configuration Controlled IRQs
+
+maintainers:
+  - Patrice Chotard <patrice.chotard@foss.st.com>
+
+description:
+  On STi based systems; External, CTI (Core Sight), PMU (Performance
+  Management), and PL310 L2 Cache IRQs are controlled using System
+  Configuration registers.  This device is used to unmask them prior to use.
+
+properties:
+  compatible:
+    const: st,stih407-irq-syscfg
+
+  st,syscfg:
+    description: Phandle to Cortex-A9 IRQ system config registers
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  st,irq-device:
+    description: Array of IRQs to enable.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    items:
+      - description: Enable the IRQ of the channel one.
+      - description: Enable the IRQ of the channel two.
+
+  st,fiq-device:
+    description: Array of FIQs to enable.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    items:
+      - description: Enable the IRQ of the channel one.
+      - description: Enable the IRQ of the channel two.
+
+  st,invert-ext:
+    description: External IRQs can be inverted at will. This property inverts
+      these three IRQs using bitwise logic, each one being encoded respectively
+      on the first, second and fourth bit.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 1, 2, 3, 4, 5, 6 ]
+
+required:
+  - compatible
+  - st,syscfg
+  - st,irq-device
+  - st,fiq-device
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq-st.h>
+    irq-syscfg {
+        compatible    = "st,stih407-irq-syscfg";
+        st,syscfg     = <&syscfg_cpu>;
+        st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
+                        <ST_IRQ_SYSCFG_PMU_1>;
+        st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
+                        <ST_IRQ_SYSCFG_DISABLED>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/media/i2c/ov5695.txt b/Documentation/devicetree/bindings/media/i2c/ov5695.txt
deleted file mode 100644 (file)
index 640a637..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-* Omnivision OV5695 MIPI CSI-2 sensor
-
-Required Properties:
-- compatible: shall be "ovti,ov5695"
-- clocks: reference to the xvclk input clock
-- clock-names: shall be "xvclk"
-- avdd-supply: Analog voltage supply, 2.8 volts
-- dovdd-supply: Digital I/O voltage supply, 1.8 volts
-- dvdd-supply: Digital core voltage supply, 1.2 volts
-- reset-gpios: Low active reset gpio
-
-The device node shall contain one 'port' child node with an
-'endpoint' subnode for its digital output video port,
-in accordance with the video interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-The endpoint optional property 'data-lanes' shall be "<1 2>".
-
-Example:
-&i2c7 {
-       ov5695: camera-sensor@36 {
-               compatible = "ovti,ov5695";
-               reg = <0x36>;
-               pinctrl-names = "default";
-               pinctrl-0 = <&clk_24m_cam>;
-
-               clocks = <&cru SCLK_TESTCLKOUT1>;
-               clock-names = "xvclk";
-
-               avdd-supply = <&pp2800_cam>;
-               dovdd-supply = <&pp1800>;
-               dvdd-supply = <&pp1250_cam>;
-               reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
-
-               port {
-                       wcam_out: endpoint {
-                               remote-endpoint = <&mipi_in_wcam>;
-                               data-lanes = <1 2>;
-                       };
-               };
-       };
-};
diff --git a/Documentation/devicetree/bindings/media/i2c/ov7251.txt b/Documentation/devicetree/bindings/media/i2c/ov7251.txt
deleted file mode 100644 (file)
index 8281151..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-* Omnivision 1/7.5-Inch B&W VGA CMOS Digital Image Sensor
-
-The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
-with an active array size of 640H x 480V. It is programmable through a serial
-I2C interface.
-
-Required Properties:
-- compatible: Value should be "ovti,ov7251".
-- clocks: Reference to the xclk clock.
-- clock-names: Should be "xclk".
-- clock-frequency: Frequency of the xclk clock.
-- enable-gpios: Chip enable GPIO. Polarity is GPIO_ACTIVE_HIGH. This corresponds
-  to the hardware pin XSHUTDOWN which is physically active low.
-- vdddo-supply: Chip digital IO regulator.
-- vdda-supply: Chip analog regulator.
-- vddd-supply: Chip digital core regulator.
-
-The device node shall contain one 'port' child node with a single 'endpoint'
-subnode for its digital output video port, in accordance with the video
-interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-
-       &i2c1 {
-               ...
-
-               ov7251: camera-sensor@60 {
-                       compatible = "ovti,ov7251";
-                       reg = <0x60>;
-
-                       enable-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&camera_bw_default>;
-
-                       clocks = <&clks 200>;
-                       clock-names = "xclk";
-                       clock-frequency = <24000000>;
-
-                       vdddo-supply = <&camera_dovdd_1v8>;
-                       vdda-supply = <&camera_avdd_2v8>;
-                       vddd-supply = <&camera_dvdd_1v2>;
-
-                       port {
-                               ov7251_ep: endpoint {
-                                       clock-lanes = <1>;
-                                       data-lanes = <0>;
-                                       remote-endpoint = <&csi0_ep>;
-                               };
-                       };
-               };
-       };
index 359dc08..6829a4a 100644 (file)
@@ -5,26 +5,41 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov5693.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Omnivision OV5693 CMOS Sensor
+title: Omnivision OV5693/OV5695 CMOS Sensors
 
 maintainers:
   - Tommaso Merciai <tommaso.merciai@amarulasolutions.com>
 
 description: |
-  The Omnivision OV5693 is a high performance, 1/4-inch, 5 megapixel, CMOS
-  image sensor that delivers 2592x1944 at 30fps. It provides full-frame,
+  The Omnivision OV5693/OV5695 are high performance, 1/4-inch, 5 megapixel, CMOS
+  image sensors that deliver 2592x1944 at 30fps. It provides full-frame,
   sub-sampled, and windowed 10-bit MIPI images in various formats via the
   Serial Camera Control Bus (SCCB) interface.
 
-  OV5693 is controlled via I2C and two-wire Serial Camera Control Bus (SCCB).
-  The sensor output is available via CSI-2 serial data output (up to 2-lane).
+  OV5693/OV5695 are controlled via I2C and two-wire Serial Camera Control Bus
+  (SCCB). The sensor output is available via CSI-2 serial data output (up to
+  2-lane).
 
 allOf:
   - $ref: /schemas/media/video-interface-devices.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ovti,ov5693
+    then:
+      properties:
+        port:
+          properties:
+            endpoint:
+              required:
+                - link-frequencies
 
 properties:
   compatible:
-    const: ovti,ov5693
+    enum:
+      - ovti,ov5693
+      - ovti,ov5695
 
   reg:
     maxItems: 1
@@ -34,6 +49,9 @@ properties:
       System input clock (aka XVCLK). From 6 to 27 MHz.
     maxItems: 1
 
+  clock-names:
+    const: xvclk
+
   dovdd-supply:
     description:
       Digital I/O voltage supply, 1.8V.
@@ -72,7 +90,6 @@ properties:
 
         required:
           - data-lanes
-          - link-frequencies
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml
new file mode 100644 (file)
index 0000000..2e5187a
--- /dev/null
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/ovti,ov7251.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OmniVision OV7251 Image Sensor
+
+description:
+  The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
+  with an active array size of 640H x 480V. It is programmable through a serial
+  I2C interface.
+
+maintainers:
+  - Todor Tomov <todor.too@gmail.com>
+
+properties:
+  compatible:
+    const: ovti,ov7251
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description: XCLK Input Clock
+
+  clock-names:
+    const: xclk
+
+  clock-frequency:
+    description: Frequency of the xclk clock in Hz.
+
+  vdda-supply:
+    description: Analog voltage supply, 2.8 volts
+
+  vddd-supply:
+    description: Digital core voltage supply, 1.2 volts
+
+  vdddo-supply:
+    description: Digital I/O voltage supply, 1.8 volts
+
+  enable-gpios:
+    maxItems: 1
+    description:
+      Reference to the GPIO connected to the XSHUTDOWN pin, if any. Polarity
+      is GPIO_ACTIVE_HIGH.
+
+  port:
+    description: Digital Output Port
+    $ref: /schemas/graph.yaml#/$defs/port-base
+    additionalProperties: false
+
+    properties:
+      endpoint:
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
+
+        properties:
+          clock-lanes:
+            maximum: 1
+
+          data-lanes:
+            maxItems: 1
+
+          link-frequencies: true
+
+        required:
+          - data-lanes
+          - link-frequencies
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - vdddo-supply
+  - vdda-supply
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        camera@3c {
+            compatible = "ovti,ov7251";
+            reg = <0x3c>;
+            clocks = <&clks 1>;
+            clock-frequency = <24000000>;
+            vdddo-supply = <&ov7251_vdddo_1v8>;
+            vdda-supply = <&ov7251_vdda_2v8>;
+            vddd-supply = <&ov7251_vddd_1v5>;
+            enable-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
+
+            port {
+                ov7251_ep: endpoint {
+                    remote-endpoint = <&csi0_ep>;
+                    clock-lanes = <1>;
+                    data-lanes = <0>;
+                    link-frequencies = /bits/ 64 <240000000 319200000>;
+                };
+            };
+        };
+    };
+...
index 0bad7e6..e466dff 100644 (file)
@@ -199,6 +199,7 @@ examples:
             wcam: camera@36 {
                 compatible = "ovti,ov5695";
                 reg = <0x36>;
+                clocks = <&cru SCLK_TESTCLKOUT1>;
 
                 port {
                     wcam_out: endpoint {
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
deleted file mode 100644 (file)
index c42eecf..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-Broadcom Kona PWM controller device tree bindings
-
-This controller has 6 channels.
-
-Required Properties :
-- compatible: should contain "brcm,kona-pwm"
-- reg: physical base address and length of the controller's registers
-- clocks: phandle + clock specifier pair for the external clock
-- #pwm-cells: Should be 3. See pwm.yaml in this directory for a
-  description of the cells format.
-
-Refer to clocks/clock-bindings.txt for generic clock consumer properties.
-
-Example:
-
-pwm: pwm@3e01a000 {
-       compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
-       reg = <0x3e01a000 0xc4>;
-       clocks = <&pwm_clk>;
-       #pwm-cells = <3>;
-};
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml
new file mode 100644 (file)
index 0000000..e86c805
--- /dev/null
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/brcm,kona-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family PWM controller
+
+description:
+  This controller has 6 channels.
+
+maintainers:
+  - Florian Fainelli <f.fainelli@gmail.com>
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - brcm,bcm11351-pwm
+      - const: brcm,kona-pwm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  '#pwm-cells':
+    const: 3
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/bcm281xx.h>
+
+    pwm@3e01a000 {
+       compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
+       reg = <0x3e01a000 0xcc>;
+       clocks = <&slave_ccu BCM281XX_SLAVE_CCU_PWM>;
+       #pwm-cells = <3>;
+    };
+...
index 4d2bef1..c8bb2ee 100644 (file)
@@ -14,13 +14,17 @@ maintainers:
 
 properties:
   compatible:
-    enum:
-      - atmel,at91rm9200-rtc
-      - atmel,at91sam9x5-rtc
-      - atmel,sama5d4-rtc
-      - atmel,sama5d2-rtc
-      - microchip,sam9x60-rtc
-      - microchip,sama7g5-rtc
+    oneOf:
+      - enum:
+          - atmel,at91rm9200-rtc
+          - atmel,at91sam9x5-rtc
+          - atmel,sama5d4-rtc
+          - atmel,sama5d2-rtc
+          - microchip,sam9x60-rtc
+          - microchip,sama7g5-rtc
+      - items:
+          - const: microchip,sam9x7-rtc
+          - const: microchip,sam9x60-rtc
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml b/Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml
new file mode 100644 (file)
index 0000000..c2d1441
--- /dev/null
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/intersil,isl12022.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intersil ISL12022 Real-time Clock
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+  compatible:
+    const: isil,isl12022
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 0
+
+  isil,battery-trip-levels-microvolt:
+    description:
+      The battery voltages at which the first alarm and second alarm
+      should trigger (normally ~85% and ~75% of nominal V_BAT).
+    items:
+      - enum: [2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000]
+      - enum: [1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000]
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: rtc.yaml#
+  # If #clock-cells is present, interrupts must not be present
+  - if:
+      required:
+        - '#clock-cells'
+    then:
+      properties:
+        interrupts: false
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        rtc@6f {
+            compatible = "isil,isl12022";
+            reg = <0x6f>;
+            interrupts-extended = <&gpio1 5 IRQ_TYPE_LEVEL_LOW>;
+            isil,battery-trip-levels-microvolt = <2550000>, <2250000>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
deleted file mode 100644 (file)
index 85be53a..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-* Maxim DS3231 Real Time Clock
-
-Required properties:
-- compatible: Should contain "maxim,ds3231".
-- reg: I2C address for chip.
-
-Optional property:
-- #clock-cells: Should be 1.
-- clock-output-names:
-  overwrite the default clock names "ds3231_clk_sqw" and "ds3231_clk_32khz".
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Following indices are allowed:
-    - 0: square-wave output on the SQW pin
-    - 1: square-wave output on the 32kHz pin
-
-- interrupts: rtc alarm/event interrupt. When this property is selected,
-  clock on the SQW pin cannot be used.
-
-Example:
-
-ds3231: ds3231@51 {
-       compatible = "maxim,ds3231";
-       reg = <0x68>;
-       #clock-cells = <1>;
-};
-
-device1 {
-...
-       clocks = <&ds3231 0>;
-...
-};
-
-device2 {
-...
-       clocks = <&ds3231 1>;
-...
-};
index bcb2300..2d9fe5a 100644 (file)
@@ -18,6 +18,7 @@ properties:
       - nxp,pca2129
       - nxp,pcf2127
       - nxp,pcf2129
+      - nxp,pcf2131
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/st,m48t86.yaml b/Documentation/devicetree/bindings/rtc/st,m48t86.yaml
new file mode 100644 (file)
index 0000000..e3e12fa
--- /dev/null
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/st,m48t86.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST M48T86 / Dallas DS12887 RTC with SRAM
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+allOf:
+  - $ref: rtc.yaml
+
+properties:
+  compatible:
+    enum:
+      - st,m48t86
+
+  reg:
+    items:
+      - description: index register
+      - description: data register
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    rtc@10800000 {
+      compatible = "st,m48t86";
+      reg = <0x10800000 0x1>, <0x11700000 0x1>;
+    };
+
+...
index 9af77f2..2a65f31 100644 (file)
@@ -45,8 +45,6 @@ properties:
       - isil,isl1208
       # Intersil ISL1218 Low Power RTC with Battery Backed SRAM
       - isil,isl1218
-      # Intersil ISL12022 Real-time Clock
-      - isil,isl12022
       # Real Time Clock Module with I2C-Bus
       - microcrystal,rv3029
       # Real Time Clock
index bdde68a..a680d7a 100644 (file)
@@ -14,7 +14,13 @@ properties:
     pattern: "^easrc@.*"
 
   compatible:
-    const: fsl,imx8mn-easrc
+    oneOf:
+      - enum:
+          - fsl,imx8mn-easrc
+      - items:
+          - enum:
+              - fsl,imx8mp-easrc
+          - const: fsl,imx8mn-easrc
 
   reg:
     maxItems: 1
index f5cc7aa..443e2e7 100644 (file)
@@ -17,6 +17,7 @@ properties:
   compatible:
     enum:
       - amlogic,meson-gxbb-wdt
+      - amlogic,t7-wdt
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml b/Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml
new file mode 100644 (file)
index 0000000..1b583f2
--- /dev/null
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/marvell,cn10624-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Global Timer (GTI) system watchdog
+
+maintainers:
+  - Bharat Bhushan <bbhushan2@marvell.com>
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - marvell,cn9670-wdt
+          - marvell,cn10624-wdt
+
+      - items:
+          - enum:
+              - marvell,cn9880-wdt
+              - marvell,cnf9535-wdt
+          - const: marvell,cn9670-wdt
+
+      - items:
+          - enum:
+              - marvell,cn10308-wdt
+              - marvell,cnf10518-wdt
+          - const: marvell,cn10624-wdt
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: refclk
+
+  marvell,wdt-timer-index:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 63
+    description:
+      An SoC have many timers (up to 64), firmware can reserve one or more timer
+      for some other use case and configures one of the global timer as watchdog
+      timer. Firmware will update this field with the timer number configured
+      as watchdog timer.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        watchdog@802000040000 {
+            compatible = "marvell,cn9670-wdt";
+            reg = <0x00008020 0x00040000 0x00000000 0x00020000>;
+            interrupts = <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>;
+            clocks = <&sclk>;
+            clock-names = "refclk";
+            marvell,wdt-timer-index = <63>;
+        };
+    };
+
+...
index 6d0fe6a..5046dfa 100644 (file)
@@ -18,6 +18,7 @@ properties:
       - items:
           - enum:
               - qcom,kpss-wdt-ipq4019
+              - qcom,apss-wdt-ipq5018
               - qcom,apss-wdt-ipq5332
               - qcom,apss-wdt-ipq9574
               - qcom,apss-wdt-msm8994
index fc55321..62ddc28 100644 (file)
@@ -34,6 +34,20 @@ properties:
   power-domains:
     maxItems: 1
 
+  memory-region:
+    maxItems: 1
+    description:
+      Contains the watchdog reserved memory. It is optional.
+      In the reserved memory, the specified values, which are
+      PON_REASON_SOF_NUM(0xBBBBCCCC), PON_REASON_MAGIC_NUM(0xDDDDDDDD),
+      and PON_REASON_EOF_NUM(0xCCCCBBBB), are pre-stored at the first
+      3 * 4 bytes to tell that last boot was caused by watchdog reset.
+      Once the PON reason is captured by driver(rti_wdt.c), the driver
+      is supposed to wipe the whole memory region. Surely, if this
+      property is set, at least 12 bytes reserved memory starting from
+      specific memory address(0xa220000) should be set. More please
+      refer to example.
+
 required:
   - compatible
   - reg
@@ -47,7 +61,18 @@ examples:
     /*
      * RTI WDT in main domain on J721e SoC. Assigned clocks are used to
      * select the source clock for the watchdog, forcing it to tick with
-     * a 32kHz clock in this case.
+     * a 32kHz clock in this case. Add a reserved memory(optional) to keep
+     * the watchdog reset cause persistent, which was be written in 12 bytes
+     * starting from 0xa2200000 by RTI Watchdog Firmware, then make it
+     * possible to get watchdog reset cause in driver.
+     *
+     * Reserved memory should be defined as follows:
+     * reserved-memory {
+     *     wdt_reset_memory_region: wdt-memory@a2200000 {
+     *         reg = <0x00 0xa2200000 0x00 0x1000>;
+     *         no-map;
+     *     };
+     * }
      */
     #include <dt-bindings/soc/ti,sci_pm_domain.h>
 
@@ -58,4 +83,5 @@ examples:
         power-domains = <&k3_pds 252 TI_SCI_PD_EXCLUSIVE>;
         assigned-clocks = <&k3_clks 252 1>;
         assigned-clock-parents = <&k3_clks 252 5>;
+        memory-region = <&wdt_reset_memory_region>;
     };
index 311af51..5da27a7 100644 (file)
@@ -32,22 +32,6 @@ register blocks.
 :c:type:`struct ata_port_operations <ata_port_operations>`
 ----------------------------------------------------------
 
-Disable ATA port
-~~~~~~~~~~~~~~~~
-
-::
-
-    void (*port_disable) (struct ata_port *);
-
-
-Called from :c:func:`ata_bus_probe` error path, as well as when unregistering
-from the SCSI module (rmmod, hot unplug). This function should do
-whatever needs to be done to take the port out of use. In most cases,
-:c:func:`ata_port_disable` can be used as this hook.
-
-Called from :c:func:`ata_bus_probe` on a failed probe. Called from
-:c:func:`ata_scsi_release`.
-
 Post-IDENTIFY device configuration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -274,14 +258,6 @@ Exception and probe handling (EH)
 
 ::
 
-    void (*eng_timeout) (struct ata_port *ap);
-    void (*phy_reset) (struct ata_port *ap);
-
-
-Deprecated. Use ``->error_handler()`` instead.
-
-::
-
     void (*freeze) (struct ata_port *ap);
     void (*thaw) (struct ata_port *ap);
 
@@ -364,8 +340,7 @@ SATA phy read/write
                        u32 val);
 
 
-Read and write standard SATA phy registers. Currently only used if
-``->phy_reset`` hook called the :c:func:`sata_phy_reset` helper function.
+Read and write standard SATA phy registers.
 sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
 
 Init and shutdown
@@ -536,13 +511,12 @@ to return without deallocating the qc. This leads us to
 
 :c:func:`ata_scsi_error` is the current ``transportt->eh_strategy_handler()``
 for libata. As discussed above, this will be entered in two cases -
-timeout and ATAPI error completion. This function calls low level libata
-driver's :c:func:`eng_timeout` callback, the standard callback for which is
-:c:func:`ata_eng_timeout`. It checks if a qc is active and calls
-:c:func:`ata_qc_timeout` on the qc if so. Actual error handling occurs in
-:c:func:`ata_qc_timeout`.
+timeout and ATAPI error completion. This function will check if a qc is active
+and has not failed yet. Such a qc will be marked with AC_ERR_TIMEOUT such that
+EH will know to handle it later. Then it calls low level libata driver's
+:c:func:`error_handler` callback.
 
-If EH is invoked for timeout, :c:func:`ata_qc_timeout` stops BMDMA and
+When the :c:func:`error_handler` callback is invoked it stops BMDMA and
 completes the qc. Note that as we're currently in EH, we cannot call
 scsi_done. As described in SCSI EH doc, a recovered scmd should be
 either retried with :c:func:`scsi_queue_insert` or finished with
index bf0124f..c4581c2 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
index ffcc9f2..de84cef 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
index 958498f..5e91ec7 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
index 76ce938..085f309 100644 (file)
@@ -57,6 +57,16 @@ a snapshot on any subdirectory (and its nested contents) in the
 system.  Snapshot creation and deletion are as simple as 'mkdir
 .snap/foo' and 'rmdir .snap/foo'.
 
+Snapshot names have two limitations:
+
+* They can not start with an underscore ('_'), as these names are reserved
+  for internal usage by the MDS.
+* They can not exceed 240 characters in size.  This is because the MDS makes
+  use of long snapshot names internally, which follow the format:
+  `_<SNAPSHOT-NAME>_<INODE-NUMBER>`.  Since filenames in general can't have
+  more than 255 characters, and `<node-id>` takes 13 characters, the long
+  snapshot names can take as much as 255 - 1 - 1 - 13 = 240.
+
 Ceph also provides some recursive accounting on directories for nested
 files and bytes.  That is, a 'getfattr -d foo' on any directory in the
 system will reveal the total number of nested regular files and
index bec25c8..8a58429 100644 (file)
@@ -20,8 +20,7 @@ The gl_holders list contains all the queued lock requests (not
 just the holders) associated with the glock. If there are any
 held locks, then they will be contiguous entries at the head
 of the list. Locks are granted in strictly the order that they
-are queued, except for those marked LM_FLAG_PRIORITY which are
-used only during recovery, and even then only for journal locks.
+are queued.
 
 There are three lock states that users of the glock layer can request,
 namely shared (SH), deferred (DF) and exclusive (EX). Those translate
index 1f96155..2b59cff 100644 (file)
@@ -461,6 +461,7 @@ Memory Area, or VMA) there is a series of lines such as the following::
     Private_Dirty:         0 kB
     Referenced:          892 kB
     Anonymous:             0 kB
+    KSM:                   0 kB
     LazyFree:              0 kB
     AnonHugePages:         0 kB
     ShmemPmdMapped:        0 kB
@@ -501,6 +502,9 @@ accessed.
 a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
 and a page is modified, the file page is replaced by a private anonymous copy.
 
+"KSM" reports how many of the pages are KSM pages. Note that KSM-placed zeropages
+are not included, only actual KSM pages.
+
 "LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE).
 The memory isn't freed immediately with madvise(). It's freed in memory
 pressure if the memory is clean. Please note that the printed value might
index be131e9..4321c38 100644 (file)
@@ -11,19 +11,19 @@ via sysfs
 product_name
 ------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
    :doc: product_name
 
 product_number
 --------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
-   :doc: product_name
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+   :doc: product_number
 
 serial_number
 -------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
    :doc: serial_number
 
 unique_id
index 3ee89df..c946eb4 100644 (file)
@@ -56,6 +56,15 @@ KCONFIG_OVERWRITECONFIG
 If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
 break symlinks when .config is a symlink to somewhere else.
 
+KCONFIG_WARN_UNKNOWN_SYMBOLS
+----------------------------
+This environment variable makes Kconfig warn about all unrecognized
+symbols in the config input.
+
+KCONFIG_WERROR
+--------------
+If set, Kconfig treats warnings as errors.
+
 `CONFIG_`
 ---------
 If you set `CONFIG_` in the environment, Kconfig will prefix all symbols
@@ -212,6 +221,10 @@ Searching in menuconfig:
        first (and in alphabetical order), then come all other symbols,
        sorted in alphabetical order.
 
+       In this menu, pressing the key in the (#) prefix will jump
+       directly to that location. You will be returned to the current
+       search results after exiting this new menu.
+
 ----------------------------------------------------------------------
 
 User interface options for 'menuconfig'
@@ -264,6 +277,10 @@ Searching in nconfig:
        F8 (SymSearch) searches the configuration symbols for the
        given string or regular expression (regex).
 
+       In the SymSearch, pressing the key in the (#) prefix will
+       jump directly to that location. You will be returned to the
+       current search results after exiting this new menu.
+
 NCONFIG_MODE
 ------------
 This mode shows all sub-menus in one large tree.
index c3851fe..b1d97fa 100644 (file)
@@ -25,50 +25,38 @@ objects <https://www.aosabook.org/en/llvm.html>`_. Clang is a front-end to LLVM
 that supports C and the GNU C extensions required by the kernel, and is
 pronounced "klang," not "see-lang."
 
-Clang
------
-
-The compiler used can be swapped out via ``CC=`` command line argument to ``make``.
-``CC=`` should be set when selecting a config and during a build. ::
-
-       make CC=clang defconfig
-
-       make CC=clang
+Building with LLVM
+------------------
 
-Cross Compiling
----------------
+Invoke ``make`` via::
 
-A single Clang compiler binary will typically contain all supported backends,
-which can help simplify cross compiling. ::
-
-       make ARCH=arm64 CC=clang CROSS_COMPILE=aarch64-linux-gnu-
+       make LLVM=1
 
-``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
-``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
-example: ::
+to compile for the host target. For cross compiling::
 
-       clang --target=aarch64-linux-gnu foo.c
+       make LLVM=1 ARCH=arm64
 
-LLVM Utilities
---------------
+The LLVM= argument
+------------------
 
-LLVM has substitutes for GNU binutils utilities. They can be enabled individually.
-The full list of supported make variables::
+LLVM has substitutes for GNU binutils utilities. They can be enabled
+individually. The full list of supported make variables::
 
        make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \
          OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
          HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
 
-To simplify the above command, Kbuild supports the ``LLVM`` variable::
-
-       make LLVM=1
+``LLVM=1`` expands to the above.
 
 If your LLVM tools are not available in your PATH, you can supply their
 location using the LLVM variable with a trailing slash::
 
        make LLVM=/path/to/llvm/
 
-which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc.
+which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc. The
+following may also be used::
+
+       PATH=/path/to/llvm:$PATH make LLVM=1
 
 If your LLVM tools have a version suffix and you want to test with that
 explicit version rather than the unsuffixed executables like ``LLVM=1``, you
@@ -78,31 +66,72 @@ can pass the suffix using the ``LLVM`` variable::
 
 which will use ``clang-14``, ``ld.lld-14``, etc.
 
+To support combinations of out of tree paths with version suffixes, we
+recommend::
+
+       PATH=/path/to/llvm/:$PATH make LLVM=-14
+
 ``LLVM=0`` is not the same as omitting ``LLVM`` altogether, it will behave like
-``LLVM=1``. If you only wish to use certain LLVM utilities, use their respective
-make variables.
+``LLVM=1``. If you only wish to use certain LLVM utilities, use their
+respective make variables.
+
+The same value used for ``LLVM=`` should be set for each invocation of ``make``
+if configuring and building via distinct commands. ``LLVM=`` should also be set
+as an environment variable when running scripts that will eventually run
+``make``.
 
-The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
-disable it.
+Cross Compiling
+---------------
 
-Omitting CROSS_COMPILE
+A single Clang compiler binary (and corresponding LLVM utilities) will
+typically contain all supported back ends, which can help simplify cross
+compiling especially when ``LLVM=1`` is used. If you use only LLVM tools,
+``CROSS_COMPILE`` or target-triple-prefixes become unnecessary. Example::
+
+       make LLVM=1 ARCH=arm64
+
+As an example of mixing LLVM and GNU utilities, for a target like ``ARCH=s390``
+which does not yet have ``ld.lld`` or ``llvm-objcopy`` support, you could
+invoke ``make`` via::
+
+       make LLVM=1 ARCH=s390 LD=s390x-linux-gnu-ld.bfd \
+         OBJCOPY=s390x-linux-gnu-objcopy
+
+This example will invoke ``s390x-linux-gnu-ld.bfd`` as the linker and
+``s390x-linux-gnu-objcopy``, so ensure those are reachable in your ``$PATH``.
+
+``CROSS_COMPILE`` is not used to prefix the Clang compiler binary (or
+corresponding LLVM utilities) as is the case for GNU utilities when ``LLVM=1``
+is not set.
+
+The LLVM_IAS= argument
 ----------------------
 
-As explained above, ``CROSS_COMPILE`` is used to set ``--target=<triple>``.
+Clang can assemble assembler code. You can pass ``LLVM_IAS=0`` to disable this
+behavior and have Clang invoke the corresponding non-integrated assembler
+instead. Example::
+
+       make LLVM=1 LLVM_IAS=0
+
+``CROSS_COMPILE`` is necessary when cross compiling and ``LLVM_IAS=0``
+is used in order to set ``--prefix=`` for the compiler to find the
+corresponding non-integrated assembler (typically, you don't want to use the
+system assembler when targeting another architecture). Example::
 
-If ``CROSS_COMPILE`` is not specified, the ``--target=<triple>`` is inferred
-from ``ARCH``.
+       make LLVM=1 ARCH=arm LLVM_IAS=0 CROSS_COMPILE=arm-linux-gnueabi-
 
-That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary.
 
-For example, to cross-compile the arm64 kernel::
+Ccache
+------
 
-       make ARCH=arm64 LLVM=1
+``ccache`` can be used with ``clang`` to improve subsequent builds, (though
+KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds
+in order to avoid 100% cache misses, see Reproducible_builds_ for more info):
 
-If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive
-``--prefix=<path>`` to search for the GNU assembler and linker. ::
+       KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang"
 
-       make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu-
+.. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp
+.. _Reproducible_builds: reproducible-builds.html#timestamps
 
 Supported Architectures
 -----------------------
@@ -135,14 +164,17 @@ yet. Bug reports are always welcome at the issue tracker below!
    * - hexagon
      - Maintained
      - ``LLVM=1``
+   * - loongarch
+     - Maintained
+     - ``LLVM=1``
    * - mips
      - Maintained
      - ``LLVM=1``
    * - powerpc
      - Maintained
-     - ``CC=clang``
+     - ``LLVM=1``
    * - riscv
-     - Maintained
+     - Supported
      - ``LLVM=1``
    * - s390
      - Maintained
@@ -171,7 +203,11 @@ Getting Help
 Getting LLVM
 -------------
 
-We provide prebuilt stable versions of LLVM on `kernel.org <https://kernel.org/pub/tools/llvm/>`_.
+We provide prebuilt stable versions of LLVM on `kernel.org
+<https://kernel.org/pub/tools/llvm/>`_. These have been optimized with profile
+data for building Linux kernels, which should improve kernel build times
+relative to other distributions of LLVM.
+
 Below are links that may be useful for building LLVM from source or procuring
 it through a distribution's package manager.
 
index c1c732e..09dcf63 100644 (file)
@@ -98,7 +98,7 @@ If you aren't subscribed to netdev and/or are simply unsure if
 repository link above for any new networking-related commits.  You may
 also check the following website for the current status:
 
-  https://patchwork.hopto.org/net-next.html
+  https://netdev.bots.linux.dev/net-next.html
 
 The ``net`` tree continues to collect fixes for the vX.Y content, and is
 fed back to Linus at regular (~weekly) intervals.  Meaning that the
@@ -120,7 +120,37 @@ queue for netdev:
   https://patchwork.kernel.org/project/netdevbpf/list/
 
 The "State" field will tell you exactly where things are at with your
-patch. Patches are indexed by the ``Message-ID`` header of the emails
+patch:
+
+================== =============================================================
+Patch state        Description
+================== =============================================================
+New, Under review  pending review, patch is in the maintainer’s queue for
+                   review; the two states are used interchangeably (depending on
+                   the exact co-maintainer handling patchwork at the time)
+Accepted           patch was applied to the appropriate networking tree, this is
+                   usually set automatically by the pw-bot
+Needs ACK          waiting for an ack from an area expert or testing
+Changes requested  patch has not passed the review, new revision is expected
+                   with appropriate code and commit message changes
+Rejected           patch has been rejected and new revision is not expected
+Not applicable     patch is expected to be applied outside of the networking
+                   subsystem
+Awaiting upstream  patch should be reviewed and handled by appropriate
+                   sub-maintainer, who will send it on to the networking trees;
+                   patches set to ``Awaiting upstream`` in netdev's patchwork
+                   will usually remain in this state, whether the sub-maintainer
+                   requested changes, accepted or rejected the patch
+Deferred           patch needs to be reposted later, usually due to dependency
+                   or because it was posted for a closed tree
+Superseded         new version of the patch was posted, usually set by the
+                   pw-bot
+RFC                not to be applied, usually not in maintainer’s review queue,
+                   pw-bot can automatically set patches to this state based
+                   on subject tags
+================== =============================================================
+
+Patches are indexed by the ``Message-ID`` header of the emails
 which carried them so if you have trouble finding your patch append
 the value of ``Message-ID`` to the URL above.
 
@@ -155,7 +185,7 @@ must match the MAINTAINERS entry) and a handful of senior reviewers.
 
 Bot records its activity here:
 
-  https://patchwork.hopto.org/pw-bot.html
+  https://netdev.bots.linux.dev/pw-bot.html
 
 Review timelines
 ~~~~~~~~~~~~~~~~
index 05ef904..8fdb20c 100644 (file)
@@ -42,7 +42,7 @@ KASAN有三种模式:
 体系架构
 ~~~~~~~~
 
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
 而基于标签的KASAN模式只在arm64上支持。
 
 编译器
index af94e71..7b1d401 100644 (file)
@@ -528,6 +528,8 @@ families may, however, require a larger buffer. 32kB buffer is recommended
 for most efficient handling of dumps (larger buffer fits more dumped
 objects and therefore fewer recvmsg() calls are needed).
 
+.. _classic_netlink:
+
 Classic Netlink
 ===============
 
index 73db30c..21a7578 100644 (file)
@@ -2259,6 +2259,8 @@ Errors:
   EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
   EPERM    (arm64) register access not allowed before vcpu finalization
+  EBUSY    (riscv) changing register value not allowed after the vcpu
+           has run at least once
   ======   ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -3499,7 +3501,7 @@ VCPU matching underlying host.
 ---------------------
 
 :Capability: basic
-:Architectures: arm64, mips
+:Architectures: arm64, mips, riscv
 :Type: vcpu ioctl
 :Parameters: struct kvm_reg_list (in/out)
 :Returns: 0 on success; -1 on error
index 9c186c2..a62f5a2 100644 (file)
@@ -6118,7 +6118,7 @@ F:        include/video/udlfb.h
 DISTRIBUTED LOCK MANAGER (DLM)
 M:     Christine Caulfield <ccaulfie@redhat.com>
 M:     David Teigland <teigland@redhat.com>
-L:     cluster-devel@redhat.com
+L:     gfs2@lists.linux.dev
 S:     Supported
 W:     http://sources.redhat.com/cluster/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git
@@ -8774,7 +8774,7 @@ F:        scripts/get_maintainer.pl
 GFS2 FILE SYSTEM
 M:     Bob Peterson <rpeterso@redhat.com>
 M:     Andreas Gruenbacher <agruenba@redhat.com>
-L:     cluster-devel@redhat.com
+L:     gfs2@lists.linux.dev
 S:     Supported
 B:     https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
@@ -11382,6 +11382,7 @@ F:      scripts/dummy-tools/
 F:     scripts/mk*
 F:     scripts/mod/
 F:     scripts/package/
+F:     usr/
 
 KERNEL HARDENING (not covered by other areas)
 M:     Kees Cook <keescook@chromium.org>
@@ -11588,6 +11589,8 @@ F:      arch/x86/include/uapi/asm/svm.h
 F:     arch/x86/include/uapi/asm/vmx.h
 F:     arch/x86/kvm/
 F:     arch/x86/kvm/*/
+F:     tools/testing/selftests/kvm/*/x86_64/
+F:     tools/testing/selftests/kvm/x86_64/
 
 KERNFS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -13742,6 +13745,7 @@ F:      include/linux/memory_hotplug.h
 F:     include/linux/mm.h
 F:     include/linux/mmzone.h
 F:     include/linux/pagewalk.h
+F:     include/linux/rmap.h
 F:     include/trace/events/ksm.h
 F:     mm/
 F:     tools/mm/
@@ -18080,7 +18084,6 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
 F:     Documentation/admin-guide/rtc.rst
 F:     Documentation/devicetree/bindings/rtc/
 F:     drivers/rtc/
-F:     include/linux/platform_data/rtc-*
 F:     include/linux/rtc.h
 F:     include/linux/rtc/
 F:     include/uapi/linux/rtc.h
@@ -21241,7 +21244,7 @@ F:      sound/soc/ti/
 TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
 M:     Shenghao Ding <shenghao-ding@ti.com>
 M:     Kevin Lu <kevin-lu@ti.com>
-M:     Baojun Xu <x1077012@ti.com>
+M:     Baojun Xu <baojun.xu@ti.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/sound/tas2552.txt
index 4f283d9..73f23fa 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -280,8 +280,8 @@ no-dot-config-targets := $(clean-targets) \
 # Installation targets should not require compiler. Unfortunately, vdso_install
 # is an exception where build artifacts may be updated. This must be fixed.
 no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
-                       headers_install modules_install kernelrelease image_name
-no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \
+                       headers_install modules_install modules_sign kernelrelease image_name
+no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \
                          image_name
 single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.symtypes %/
 
@@ -510,7 +510,6 @@ LEX         = flex
 YACC           = bison
 AWK            = awk
 INSTALLKERNEL  := installkernel
-DEPMOD         = depmod
 PERL           = perl
 PYTHON3                = python3
 CHECK          = sparse
@@ -564,14 +563,6 @@ KBUILD_CFLAGS += -funsigned-char
 KBUILD_CFLAGS += -fno-common
 KBUILD_CFLAGS += -fno-PIE
 KBUILD_CFLAGS += -fno-strict-aliasing
-KBUILD_CFLAGS += -Wall
-KBUILD_CFLAGS += -Wundef
-KBUILD_CFLAGS += -Werror=implicit-function-declaration
-KBUILD_CFLAGS += -Werror=implicit-int
-KBUILD_CFLAGS += -Werror=return-type
-KBUILD_CFLAGS += -Werror=strict-prototypes
-KBUILD_CFLAGS += -Wno-format-security
-KBUILD_CFLAGS += -Wno-trigraphs
 
 KBUILD_CPPFLAGS := -D__KERNEL__
 KBUILD_RUSTFLAGS := $(rust_common_flags) \
@@ -824,10 +815,6 @@ endif # may-sync-config
 endif # need-config
 
 KBUILD_CFLAGS  += -fno-delete-null-pointer-checks
-KBUILD_CFLAGS  += $(call cc-disable-warning,frame-address,)
-KBUILD_CFLAGS  += $(call cc-disable-warning, format-truncation)
-KBUILD_CFLAGS  += $(call cc-disable-warning, format-overflow)
-KBUILD_CFLAGS  += $(call cc-disable-warning, address-of-packed-member)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
 KBUILD_CFLAGS += -O2
@@ -858,40 +845,15 @@ ifdef CONFIG_READABLE_ASM
 KBUILD_CFLAGS += -fno-reorder-blocks -fno-ipa-cp-clone -fno-partial-inlining
 endif
 
-ifneq ($(CONFIG_FRAME_WARN),0)
-KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
-endif
-
 stackp-flags-y                                    := -fno-stack-protector
 stackp-flags-$(CONFIG_STACKPROTECTOR)             := -fstack-protector
 stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
-KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
-KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
-
 KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings
 KBUILD_RUSTFLAGS += $(KBUILD_RUSTFLAGS-y)
 
-ifdef CONFIG_CC_IS_CLANG
-# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
-KBUILD_CFLAGS += -Wno-gnu
-else
-
-# gcc inanely warns about local variables called 'main'
-KBUILD_CFLAGS += -Wno-main
-endif
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-
-# These result in bogus false positives
-KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
-
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y
@@ -1026,51 +988,12 @@ endif
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc
 
-# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
-KBUILD_CFLAGS += -Wvla
-
-# disable pointer signed / unsigned warnings in gcc 4.0
-KBUILD_CFLAGS += -Wno-pointer-sign
-
-# In order to make sure new function cast mismatches are not introduced
-# in the kernel (to avoid tripping CFI checking), the kernel should be
-# globally built with -Wcast-function-type.
-KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
-
 # To gain proper coverage for CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE,
 # the kernel uses only C99 flexible arrays for dynamically sized trailing
 # arrays. Enforce this for everything that may examine structure sizes and
 # perform bounds checking.
 KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
 
-# disable stringop warnings in gcc 8+
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
-
-# We'll want to enable this eventually, but it's not going away for 5.7 at least
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
-
-# Another good warning that we'll want to enable eventually
-KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
-
-# Enabled with W=2, disabled by default as noisy
-ifdef CONFIG_CC_IS_GCC
-KBUILD_CFLAGS += -Wno-maybe-uninitialized
-endif
-
-# The allocators already balk at large sizes, so silence the compiler
-# warnings for bounds checks involving those possible values. While
-# -Wno-alloc-size-larger-than would normally be used here, earlier versions
-# of gcc (<9.1) weirdly don't handle the option correctly when _other_
-# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
-# doesn't work (as it is documented to), silently resolving to "0" prior to
-# version 9.1 (and producing an error more recently). Numeric values larger
-# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
-# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
-# choice, we must perform a versioned check to disable this warning.
-# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
-KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
-
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += -fno-strict-overflow
 
@@ -1082,15 +1005,6 @@ ifdef CONFIG_CC_IS_GCC
 KBUILD_CFLAGS   += -fconserve-stack
 endif
 
-# Prohibit date/time macros, which would make the build non-deterministic
-KBUILD_CFLAGS   += -Werror=date-time
-
-# enforce correct pointer usage
-KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
-
-# Require designated initializers for all marked structures
-KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
-
 # change __FILE__ to the relative path from the srctree
 KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 
@@ -1548,44 +1462,8 @@ modules: modules_prepare
 modules_prepare: prepare
        $(Q)$(MAKE) $(build)=scripts scripts/module.lds
 
-export modules_sign_only :=
-
-ifeq ($(CONFIG_MODULE_SIG),y)
-PHONY += modules_sign
-modules_sign: modules_install
-       @:
-
-# modules_sign is a subset of modules_install.
-# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
-ifeq ($(filter modules_install,$(MAKECMDGOALS)),)
-modules_sign_only := y
-endif
-endif
-
 endif # CONFIG_MODULES
 
-modinst_pre :=
-ifneq ($(filter modules_install,$(MAKECMDGOALS)),)
-modinst_pre := __modinst_pre
-endif
-
-modules_install: $(modinst_pre)
-PHONY += __modinst_pre
-__modinst_pre:
-       @rm -rf $(MODLIB)/kernel
-       @rm -f $(MODLIB)/source
-       @mkdir -p $(MODLIB)
-ifdef CONFIG_MODULES
-       @ln -s $(abspath $(srctree)) $(MODLIB)/source
-       @if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
-               rm -f $(MODLIB)/build ; \
-               ln -s $(CURDIR) $(MODLIB)/build ; \
-       fi
-       @sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order
-endif
-       @cp -f modules.builtin $(MODLIB)/
-       @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
-
 ###
 # Cleaning is done on three levels.
 # make clean     Delete most generated files
@@ -1594,7 +1472,7 @@ endif
 # make distclean Remove editor backup files, patch leftover files and the like
 
 # Directories & files removed with 'make clean'
-CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \
+CLEAN_FILES += vmlinux.symvers modules-only.symvers \
               modules.builtin modules.builtin.modinfo modules.nsdeps \
               compile_commands.json .thinlto-cache rust/test rust/doc \
               rust-project.json .vmlinux.objs .vmlinux.export.c
@@ -1608,7 +1486,7 @@ MRPROPER_FILES += include/config include/generated          \
                  certs/signing_key.pem \
                  certs/x509.genkey \
                  vmlinux-gdb.py \
-                 *.spec rpmbuild \
+                 kernel.spec rpmbuild \
                  rust/libmacros.so
 
 # clean - Delete most, but leave enough to build external modules
@@ -1675,7 +1553,6 @@ help:
        @echo  '  mrproper        - Remove all generated files + config + various backup files'
        @echo  '  distclean       - mrproper + remove editor backup and patch files'
        @echo  ''
-       @echo  'Configuration targets:'
        @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help
        @echo  ''
        @echo  'Other generic targets:'
@@ -1923,19 +1800,39 @@ help:
        @echo  '  rust-analyzer   - generate rust-project.json rust-analyzer support file'
        @echo  ''
 
+ifndef CONFIG_MODULES
+modules modules_install: __external_modules_error
 __external_modules_error:
        @echo >&2 '***'
        @echo >&2 '*** The present kernel disabled CONFIG_MODULES.'
        @echo >&2 '*** You cannot build or install external modules.'
        @echo >&2 '***'
        @false
+endif
 
 endif # KBUILD_EXTMOD
 
 # ---------------------------------------------------------------------------
 # Modules
 
-PHONY += modules modules_install modules_prepare
+PHONY += modules modules_install modules_sign modules_prepare
+
+modules_install:
+       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst \
+       sign-only=$(if $(filter modules_install,$(MAKECMDGOALS)),,y)
+
+ifeq ($(CONFIG_MODULE_SIG),y)
+# modules_sign is a subset of modules_install.
+# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
+modules_sign: modules_install
+       @:
+else
+modules_sign:
+       @echo >&2 '***'
+       @echo >&2 '*** CONFIG_MODULE_SIG is disabled. You cannot sign modules.'
+       @echo >&2 '***'
+       @false
+endif
 
 ifdef CONFIG_MODULES
 
@@ -1953,22 +1850,9 @@ PHONY += modules_check
 modules_check: $(MODORDER)
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/modules-check.sh $<
 
-quiet_cmd_depmod = DEPMOD  $(MODLIB)
-      cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
-                   $(KERNELRELEASE)
-
-modules_install:
-       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
-       $(call cmd,depmod)
-
 else # CONFIG_MODULES
 
-# Modules not configured
-# ---------------------------------------------------------------------------
-
-PHONY += __external_modules_error
-
-modules modules_install: __external_modules_error
+modules:
        @:
 
 KBUILD_MODULES :=
@@ -2147,6 +2031,10 @@ kernelversion:
 image_name:
        @echo $(KBUILD_IMAGE)
 
+PHONY += run-command
+run-command:
+       $(Q)$(KBUILD_RUN_COMMAND)
+
 quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN   $(wildcard $(rm-files)))
       cmd_rmfiles = rm -rf $(rm-files)
 
index dd31e97..396caec 100644 (file)
@@ -3,6 +3,5 @@
 generated-y += syscall_table.h
 generic-y += agp.h
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
index b13c4a2..36b63f2 100644 (file)
@@ -3,8 +3,8 @@
  *     arch/alpha/lib/callback_srm.S
  */
 
+#include <linux/export.h>
 #include <asm/console.h>
-#include <asm/export.h>
 
 .text
 #define HWRPB_CRB_OFFSET 0xc0
index ce02de7..af70ee3 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global clear_page
index db6c6ca..848eb60 100644 (file)
@@ -10,7 +10,7 @@
  * a successful copy).  There is also some rather minor exception setup
  * stuff.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
index 5439a30..1c444fd 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Copy an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global copy_page
index 32ab034..ef18faa 100644 (file)
@@ -12,7 +12,7 @@
  * exception setup stuff..
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
index c7b213a..273c426 100644 (file)
@@ -13,7 +13,7 @@
  * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
index 2b60eb4..db01840 100644 (file)
@@ -46,7 +46,7 @@
  *     $28 - compare status
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
index 325864c..a534d9f 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .text
         .align 4
         .global clear_page
index 7e644f8..af776cc 100644 (file)
@@ -29,7 +29,7 @@
  *     want to leave a hole (and we also want to avoid repeating lots of work)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
        99: x,##y;                      \
index fd7212c..36be511 100644 (file)
@@ -57,7 +57,7 @@
    destination pages are in the dcache, but it is my guess that this is
    less important than the dcache miss case.  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global copy_page
index f3e4337..b9b1971 100644 (file)
@@ -23,7 +23,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
        99: x,##y;                      \
index 9a73f90..2ee548b 100644 (file)
@@ -53,7 +53,7 @@
  * may cause additional delay in rare cases (load-load replay traps).
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
index 137ff1a..b73a6d2 100644 (file)
@@ -56,7 +56,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
index 56bf9e1..f75ba43 100644 (file)
@@ -28,7 +28,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
index ffbd056..3ef43c2 100644 (file)
@@ -20,7 +20,7 @@
  * Temp usage notes:
  *     $1,$2,          - scratch
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index 1cfcfbb..89d7809 100644 (file)
@@ -27,7 +27,7 @@
  * as fixes will need to be made in multiple places.  The performance gain
  * is worth it.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 .text
index ec3096a..f8c7305 100644 (file)
@@ -20,7 +20,7 @@
  * string once.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 4
index fbf89e0..97a7cb4 100644 (file)
@@ -16,7 +16,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index b73106f..3d90788 100644 (file)
@@ -18,7 +18,7 @@
  *     U       - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index ceb0ca5..8f31323 100644 (file)
@@ -21,7 +21,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 4
index 7f80e39..ae7355f 100644 (file)
@@ -19,7 +19,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index c13d3ec..45366e3 100644 (file)
@@ -31,7 +31,7 @@ For correctness consider that:
       - only minimum number of quadwords may be accessed
       - the third argument is an unsigned long
 */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
index 42d1922..3a27689 100644 (file)
@@ -7,7 +7,7 @@
  * This is hand-massaged output from the original memcpy.c.  We defer to
  * memcpy whenever possible; the backwards copy loops are not unrolled.
  */
-#include <asm/export.h>        
+#include <linux/export.h>
        .set noat
        .set noreorder
        .text
index 00393e3..9075d69 100644 (file)
@@ -14,7 +14,7 @@
  * The scheduling comments are according to the EV5 documentation (and done by 
  * hand, so they might well be incorrect, please do tell me about it..)
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 .text
index 055877d..62b90eb 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Append a null-terminated string from SRC to DST.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
        .text
 
index 17871dd..68c54ff 100644 (file)
@@ -6,7 +6,7 @@
  * Return the address of a given character within a null-terminated
  * string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index cb74ad2..d8773ba 100644 (file)
@@ -6,7 +6,7 @@
  * Copy a null-terminated string from SRC to DST.  Return a pointer
  * to the null-terminator in the source.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 3
index dd882fe..4fc6a6f 100644 (file)
@@ -12,7 +12,7 @@
  *       do this instead of the 9 instructions that
  *       binary search needs).
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index 522fee3..a913a7c 100644 (file)
@@ -10,7 +10,7 @@
  * past count, whereas libc may write to count+1.  This follows the generic
  * implementation in lib/string.c and is, IMHO, more sensible.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 3
index cc57fad..cb90cf0 100644 (file)
@@ -11,7 +11,7 @@
  * version has cropped that bit o' nastiness as well as assuming that
  * __stxncpy is in range of a branch.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 
index 7650ba9..dd8e073 100644 (file)
@@ -6,7 +6,7 @@
  * Return the address of the last occurrence of a given character
  * within a null-terminated string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index b887aa5..96f0591 100644 (file)
@@ -25,7 +25,7 @@
  # along with GCC; see the file COPYING.  If not, write to the 
  # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  # MA 02111-1307, USA.
-#include <asm/export.h>
+#include <linux/export.h>
 
         .set noreorder
         .set noat
index 6f4995a..3162db5 100644 (file)
@@ -27,6 +27,8 @@ config ARC
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_IOREMAP
+       select GENERIC_STRNCPY_FROM_USER if MMU
+       select GENERIC_STRNLEN_USER if MMU
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
@@ -491,11 +493,11 @@ config ARC_KVADDR_SIZE
          kernel-user gutter)
 
 config ARC_CURR_IN_REG
-       bool "Dedicate Register r25 for current_task pointer"
+       bool "cache current task pointer in gp"
        default y
        help
-         This reserved Register R25 to point to Current Task in
-         kernel mode. This saves memory access for each such access
+         This reserves gp register to point to Current Task in
+         kernel mode eliding memory access for each access
 
 
 config ARC_EMUL_UNALIGNED
index 329400a..2390dd0 100644 (file)
@@ -28,14 +28,14 @@ cflags-y                            += $(tune-mcpu-def-y)
 endif
 endif
 
-
 ifdef CONFIG_ARC_CURR_IN_REG
 # For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
-# any kernel headers, and missing the r25 global register
+# any kernel headers, and missing the global register
 # Can't do unconditionally because of recursive include issues
 # due to <linux/thread_info.h>
 LINUXINCLUDE   +=  -include $(srctree)/arch/arc/include/asm/current.h
+cflags-y       += -ffixed-gp
 endif
 
 cflags-y                               += -fsection-anchors
@@ -67,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)               += -fasynchronous-unwind-tables $(cfi)
 # small data is default for elf32 tool-chain. If not usable, disable it
 # This also allows repurposing GP as scratch reg to gcc reg allocator
 disable_small_data := y
-cflags-$(disable_small_data)           += -mno-sdata -fcall-used-gp
+cflags-$(disable_small_data)           += -mno-sdata
 
 cflags-$(CONFIG_CPU_BIG_ENDIAN)                += -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)       += -EB
index 2162023..4b13f60 100644 (file)
@@ -23,7 +23,7 @@
 #define ARC_REG_ICCM_BUILD     0x78    /* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR     0x79
 #define ARC_REG_MAC_BCR                0x7a
-#define ARC_REG_MUL_BCR                0x7b
+#define ARC_REG_MPY_BCR                0x7b
 #define ARC_REG_SWAP_BCR       0x7c
 #define ARC_REG_NORM_BCR       0x7d
 #define ARC_REG_MIXMAX_BCR     0x7e
@@ -177,7 +177,7 @@ struct bcr_isa_arcv2 {
 #endif
 };
 
-struct bcr_uarch_build_arcv2 {
+struct bcr_uarch_build {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int pad:8, prod:8, maj:8, min:8;
 #else
@@ -185,6 +185,59 @@ struct bcr_uarch_build_arcv2 {
 #endif
 };
 
+struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+                    u_itlb:4, u_dtlb:4;
+#else
+       unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+                    ways:4, ver:8;
+#endif
+};
+
+struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+                    n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+       /*           DTLB      ITLB      JES        JE         JA      */
+       unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+                    pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+};
+
+struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+       unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+};
+
+struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+       unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+};
+
+struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+       unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+};
+
+struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+       unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+};
+
 struct bcr_mpy {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
@@ -302,48 +355,6 @@ struct bcr_generic {
 #endif
 };
 
-/*
- *******************************************************************
- * Generic structures to hold build configuration used at runtime
- */
-
-struct cpuinfo_arc_mmu {
-       unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
-       unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
-};
-
-struct cpuinfo_arc_cache {
-       unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
-};
-
-struct cpuinfo_arc_bpu {
-       unsigned int ver, full, num_cache, num_pred, ret_stk;
-};
-
-struct cpuinfo_arc_ccm {
-       unsigned int base_addr, sz;
-};
-
-struct cpuinfo_arc {
-       struct cpuinfo_arc_cache icache, dcache, slc;
-       struct cpuinfo_arc_mmu mmu;
-       struct cpuinfo_arc_bpu bpu;
-       struct bcr_identity core;
-       struct bcr_isa_arcv2 isa;
-       const char *release, *name;
-       unsigned int vec_base;
-       struct cpuinfo_arc_ccm iccm, dccm;
-       struct {
-               unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
-                            fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
-                            ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
-                            timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
-       } extn;
-       struct bcr_mpy extn_mpy;
-};
-
-extern struct cpuinfo_arc cpuinfo_arc700[];
-
 static inline int is_isa_arcv2(void)
 {
        return IS_ENABLED(CONFIG_ISA_ARCV2);
index 1b0ffae..5258cb8 100644 (file)
@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v)                       \
        : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
        : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
 }                                                                      \
 
 #define ATOMIC_OP_RETURN(op, asm_op)                           \
@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)       \
        : [val] "=&r"   (val)                                           \
        : [ctr] "r"     (&v->counter),                                  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
                                                                        \
        return val;                                                     \
 }
@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)        \
          [orig] "=&r" (orig)                                           \
        : [ctr] "r"     (&v->counter),                                  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
                                                                        \
        return orig;                                                    \
 }
index 6b6db98..9b5791b 100644 (file)
@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v)           \
        "       bnz     1b              \n"                             \
        : "=&r"(val)                                                    \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)                               \
@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)   \
        "       bnz     1b              \n"                             \
        : [val] "=&r"(val)                                              \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
+       : "cc", "memory");                                              \
                                                                        \
        return val;                                                     \
 }
@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)    \
        "       bnz     1b              \n"                             \
        : "=&r"(orig), "=&r"(val)                                       \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
+       : "cc", "memory");                                              \
                                                                        \
        return orig;                                                    \
 }
index 9b9bdd3..06be89f 100644 (file)
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 
-register struct task_struct *curr_arc asm("r25");
+register struct task_struct *curr_arc asm("gp");
 #define current (curr_arc)
 
 #else
index 5f4de05..a0d5ebe 100644 (file)
 
 #ifdef ARC_DW2_UNWIND_AS_CFI
 
-#define CFI_STARTPROC  .cfi_startproc
-#define CFI_ENDPROC    .cfi_endproc
-#define CFI_DEF_CFA    .cfi_def_cfa
-#define CFI_REGISTER   .cfi_register
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_UNDEFINED  .cfi_undefined
+#define CFI_STARTPROC          .cfi_startproc
+#define CFI_ENDPROC            .cfi_endproc
+#define CFI_DEF_CFA            .cfi_def_cfa
+#define CFI_DEF_CFA_OFFSET     .cfi_def_cfa_offset
+#define CFI_DEF_CFA_REGISTER   .cfi_def_cfa_register
+#define CFI_OFFSET             .cfi_offset
+#define CFI_REL_OFFSET         .cfi_rel_offset
+#define CFI_REGISTER           .cfi_register
+#define CFI_RESTORE            .cfi_restore
+#define CFI_UNDEFINED          .cfi_undefined
 
 #else
 
 #define CFI_IGNORE     #
 
-#define CFI_STARTPROC  CFI_IGNORE
-#define CFI_ENDPROC    CFI_IGNORE
-#define CFI_DEF_CFA    CFI_IGNORE
-#define CFI_REGISTER   CFI_IGNORE
-#define CFI_REL_OFFSET CFI_IGNORE
-#define CFI_UNDEFINED  CFI_IGNORE
+#define CFI_STARTPROC          CFI_IGNORE
+#define CFI_ENDPROC            CFI_IGNORE
+#define CFI_DEF_CFA            CFI_IGNORE
+#define CFI_DEF_CFA_OFFSET     CFI_IGNORE
+#define CFI_DEF_CFA_REGISTER   CFI_IGNORE
+#define CFI_OFFSET             CFI_IGNORE
+#define CFI_REL_OFFSET         CFI_IGNORE
+#define CFI_REGISTER           CFI_IGNORE
+#define CFI_RESTORE            CFI_IGNORE
+#define CFI_UNDEFINED          CFI_IGNORE
 
 #endif /* !ARC_DW2_UNWIND_AS_CFI */
 
index 0ff4c06..4d13320 100644 (file)
@@ -18,7 +18,6 @@
  *              |      orig_r0      |
  *              |      event/ECR    |
  *              |      bta          |
- *              |      user_r25     |
  *              |      gp           |
  *              |      fp           |
  *              |      sp           |
 /*------------------------------------------------------------------------*/
 .macro INTERRUPT_PROLOGUE
 
-       ; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+       ; Before jumping to Interrupt Vector, hardware micro-ops did following:
        ;   1. SP auto-switched to kernel mode stack
        ;   2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
        ;   3. Auto save: (mandatory) Push PC and STAT32 on stack
        ;                 hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
-       ;   4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+       ;  4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
        ;
-       ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+       ; Now
+       ;  4b. If Auto-save (optional) not enabled in hw, manually save them
+       ;   5. Manually save: r12,r30, sp,fp,gp, ACCL pair
+       ;
+       ; At the end, SP points to pt_regs
 
 #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
        ; carve pt_regs on stack (case #3), PC/STAT32 already on stack
 .endm
 
 /*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
-       ; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+       ; Before jumping to Exception Vector, hardware micro-ops did following:
        ;   1. SP auto-switched to kernel mode stack
        ;   2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
        ;
-       ; (B) Manually save the complete reg file below
+       ; Now manually save rest of reg file
+       ; At the end, SP points to pt_regs
 
-       sub     sp, sp, SZ_PT_REGS      ; carve pt_regs
+       sub     sp, sp, SZ_PT_REGS      ; carve space for pt_regs
 
        ; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
 
        ; OUTPUT: r10 has ECR expected by EV_Trap
 .endm
 
+.macro EXCEPTION_PROLOGUE
+
+       EXCEPTION_PROLOGUE_KEEP_AE      ; return ECR in r10
+
+       lr  r0, [efa]
+       mov r1, sp
+
+       FAKE_RET_FROM_EXCPN             ; clobbers r9
+.endm
+
 /*------------------------------------------------------------------------
  * This macro saves the registers manually which would normally be autosaved
  * by hardware on taken interrupts. It is used by
  */
 .macro __SAVE_REGFILE_SOFT
 
-       ST2     gp, fp, PT_r26          ; gp (r26), fp (r27)
-
-       st      r12, [sp, PT_sp + 4]
-       st      r30, [sp, PT_sp + 8]
+       st      fp,  [sp, PT_fp]        ; r27
+       st      r30, [sp, PT_r30]
+       st      r12, [sp, PT_r12]
+       st      r26, [sp, PT_r26]       ; gp
 
        ; Saving pt_regs->sp correctly requires some extra work due to the way
        ; Auto stack switch works
 
        ; ISA requires ADD.nz to have same dest and src reg operands
        mov.nz  r10, sp
-       add.nz  r10, r10, SZ_PT_REGS    ; K mode SP
+       add2.nz r10, r10, SZ_PT_REGS/4  ; K mode SP
 
        st      r10, [sp, PT_sp]        ; SP (pt_regs->sp)
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       st      r25, [sp, PT_user_r25]
-       GET_CURR_TASK_ON_CPU    r25
-#endif
-
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
        ST2     r58, r59, PT_r58
 #endif
 
        /* clobbers r10, r11 registers pair */
        DSP_SAVE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       GET_CURR_TASK_ON_CPU    gp
+#endif
+
 .endm
 
 /*------------------------------------------------------------------------*/
 .macro __RESTORE_REGFILE_SOFT
 
-       LD2     gp, fp, PT_r26          ; gp (r26), fp (r27)
-
-       ld      r12, [sp, PT_r12]
+       ld      fp,  [sp, PT_fp]
        ld      r30, [sp, PT_r30]
+       ld      r12, [sp, PT_r12]
+       ld      r26, [sp, PT_r26]
 
        ; Restore SP (into AUX_USER_SP) only if returning to U mode
        ;  - for K mode, it will be implicitly restored as stack is unwound
        sr      r10, [AUX_USER_SP]
 1:
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, PT_user_r25]
-#endif
-
        /* clobbers r10, r11 registers pair */
        DSP_RESTORE_REGFILE_IRQ
 
 
        btst    r0, STATUS_U_BIT        ; Z flag set if K, used in restoring SP
 
-       ld      r10, [sp, PT_event + 4]
+       ld      r10, [sp, PT_bta]
        sr      r10, [erbta]
 
        LD2     r10, r11, PT_ret
 
 .macro FAKE_RET_FROM_EXCPN
        lr      r9, [status32]
-       bic     r9, r9, STATUS_AE_MASK
-       or      r9, r9, STATUS_IE_MASK
+       bclr    r9, r9, STATUS_AE_BIT
+       bset    r9, r9, STATUS_IE_BIT
        kflag   r9
 .endm
 
index 67ff06e..a0e760e 100644 (file)
  *
  * After this it is safe to call the "C" handlers
  *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
        /* Need at least 1 reg to code the early exception prologue */
        PROLOG_FREEUP_REG r9, @ex_saved_reg1
        /* ARC700 doesn't provide auto-stack switching */
        SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /* Treat r25 as scratch reg (save on stack) and load with "current" */
-       PUSH    r25
-       GET_CURR_TASK_ON_CPU   r25
-#else
-       sub     sp, sp, 4
-#endif
-
        st.a    r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
        sub     sp, sp, 4       /* skip pt_regs->sp, already saved above */
 
        PUSHAX  erbta
 
        lr      r10, [ecr]
-       st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
+       st      r10, [sp, PT_event]
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* gp already saved on stack: now load with "current" */
+       GET_CURR_TASK_ON_CPU   gp
+#endif
+       ; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+.macro EXCEPTION_PROLOGUE
+
+       EXCEPTION_PROLOGUE_KEEP_AE      ; return ECR in r10
+
+       lr  r0, [efa]
+       mov r1, sp
+
+       FAKE_RET_FROM_EXCPN             ; clobbers r9
 .endm
 
 /*--------------------------------------------------------------
        POP     gp
        RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, 12]
-#endif
        ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
+       /* orig_r0, ECR skipped automatically */
 .endm
 
 /* Dummy ECR values for Interrupts */
 
        SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /* Treat r25 as scratch reg (save on stack) and load with "current" */
-       PUSH    r25
-       GET_CURR_TASK_ON_CPU   r25
-#else
-       sub     sp, sp, 4
-#endif
 
        PUSH    0x003\LVL\()abcd    /* Dummy ECR */
        sub     sp, sp, 8           /* skip orig_r0 (not needed)
        PUSHAX  lp_start
        PUSHAX  bta_l\LVL\()
 
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* gp already saved on stack: now load with "current" */
+       GET_CURR_TASK_ON_CPU   gp
+#endif
 .endm
 
 /*--------------------------------------------------------------
        POP     gp
        RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, 12]
-#endif
-       ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
+       ld  sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */
 .endm
 
 /* Get thread_info of "current" tsk */
index fcdd59d..49c2e09 100644 (file)
@@ -13,6 +13,8 @@
 #include <asm/processor.h>     /* For VMALLOC_START */
 #include <asm/mmu.h>
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/entry-compact.h> /* ISA specific bits */
 #else
@@ -89,7 +91,7 @@
  * Helpers to save/restore callee-saved regs:
  * used by several macros below
  *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R24
+.macro SAVE_R13_TO_R25
        PUSH    r13
        PUSH    r14
        PUSH    r15
        PUSH    r22
        PUSH    r23
        PUSH    r24
+       PUSH    r25
 .endm
 
-.macro RESTORE_R24_TO_R13
+.macro RESTORE_R25_TO_R13
+       POP     r25
        POP     r24
        POP     r23
        POP     r22
        POP     r13
 .endm
 
-/*--------------------------------------------------------------
- * Collect User Mode callee regs as struct callee_regs - needed by
- * fork/do_signal/unaligned-access-emulation.
- * (By default only scratch regs are saved on entry to kernel)
- *
- * Special handling for r25 if used for caching Task Pointer.
- * It would have been saved in task->thread.user_r25 already, but to keep
- * the interface same it is copied into regular r25 placeholder in
- * struct callee_regs.
- *-------------------------------------------------------------*/
+/*
+ * save user mode callee regs as struct callee_regs
+ *  - needed by fork/do_signal/unaligned-access-emulation.
+ */
 .macro SAVE_CALLEE_SAVED_USER
+       SAVE_R13_TO_R25
+.endm
 
-       mov     r12, sp         ; save SP as ref to pt_regs
-       SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ; Retrieve orig r25 and save it with rest of callee_regs
-       ld      r12, [r12, PT_user_r25]
-       PUSH    r12
-#else
-       PUSH    r25
-#endif
-
+/*
+ * restore user mode callee regs as struct callee_regs
+ *  - could have been changed by ptrace tracer or unaligned-access fixup
+ */
+.macro RESTORE_CALLEE_SAVED_USER
+       RESTORE_R25_TO_R13
 .endm
 
-/*--------------------------------------------------------------
- * Save kernel Mode callee regs at the time of Contect Switch.
- *
- * Special handling for r25 if used for caching Task Pointer.
- * Kernel simply skips saving it since it will be loaded with
- * incoming task pointer anyways
- *-------------------------------------------------------------*/
+/*
+ * save/restore kernel mode callee regs at the time of context switch
+ */
 .macro SAVE_CALLEE_SAVED_KERNEL
-
-       SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       sub     sp, sp, 4
-#else
-       PUSH    r25
-#endif
+       SAVE_R13_TO_R25
 .endm
 
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_KERNEL
- *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_KERNEL
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       add     sp, sp, 4  /* skip usual r25 placeholder */
-#else
-       POP     r25
-#endif
-       RESTORE_R24_TO_R13
-.endm
-
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_USER
- *
- * ptrace tracer or unaligned-access fixup might have changed a user mode
- * callee reg which is saved back to usual r25 storage location
- *-------------------------------------------------------------*/
-.macro RESTORE_CALLEE_SAVED_USER
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       POP     r12
-#else
-       POP     r25
-#endif
-       RESTORE_R24_TO_R13
-
-       ; SP is back to start of pt_regs
-#ifdef CONFIG_ARC_CURR_IN_REG
-       st      r12, [sp, PT_user_r25]
-#endif
+       RESTORE_R25_TO_R13
 .endm
 
 /*--------------------------------------------------------------
 
 #ifdef CONFIG_SMP
 
-/*-------------------------------------------------
+/*
  * Retrieve the current running task on this CPU
- * 1. Determine curr CPU id.
- * 2. Use it to index into _current_task[ ]
+ *  - loads it from backing _current_task[] (and can't use the
+ *    caching reg for current task
  */
 .macro  GET_CURR_TASK_ON_CPU   reg
        GET_CPU_ID  \reg
        add2 \tmp, @_current_task, \tmp
        st   \tsk, [\tmp]
 #ifdef CONFIG_ARC_CURR_IN_REG
-       mov r25, \tsk
+       mov gp, \tsk
 #endif
 
 .endm
 .macro  SET_CURR_TASK_ON_CPU    tsk, tmp
        st  \tsk, [@_current_task]
 #ifdef CONFIG_ARC_CURR_IN_REG
-       mov r25, \tsk
+       mov gp, \tsk
 #endif
 .endm
 
 #endif /* SMP / UNI */
 
-/* ------------------------------------------------------------------
+/*
  * Get the ptr to some field of Current Task at @off in task struct
- *  -Uses r25 for Current task ptr if that is enabled
+ *  - Uses current task cached in reg if enabled
  */
-
 #ifdef CONFIG_ARC_CURR_IN_REG
 
 .macro GET_CURR_TASK_FIELD_PTR  off,  reg
-       add \reg, r25, \off
+       add \reg, gp, \off
 .endm
 
 #else
 
 #endif /* CONFIG_ARC_CURR_IN_REG */
 
+#else  /* !__ASSEMBLY__ */
+
+extern void do_signal(struct pt_regs *);
+extern void do_notify_resume(struct pt_regs *);
+extern int do_privilege_fault(unsigned long, struct pt_regs *);
+extern int do_extension_fault(unsigned long, struct pt_regs *);
+extern int insterror_is_error(unsigned long, struct pt_regs *);
+extern int do_memory_error(unsigned long, struct pt_regs *);
+extern int trap_is_brkpt(unsigned long, struct pt_regs *);
+extern int do_misaligned_error(unsigned long, struct pt_regs *);
+extern int do_trap5_error(unsigned long, struct pt_regs *);
+extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *);
+extern void do_machine_check_fault(unsigned long, struct pt_regs *);
+extern void do_non_swi_trap(unsigned long, struct pt_regs *);
+extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *);
+extern void do_page_fault(unsigned long, struct pt_regs *);
+
+#endif
+
 #endif  /* __ASM_ARC_ENTRY_H */
index 0309cb4..c574712 100644 (file)
@@ -25,5 +25,6 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
+extern void arch_do_IRQ(unsigned int, struct pt_regs *);
 
 #endif
index ca427c3..9febf5b 100644 (file)
@@ -14,6 +14,8 @@ typedef struct {
        unsigned long asid[NR_CPUS];    /* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
+extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
+
 #endif
 
 #include <asm/mmu-arcv2.h>
index fb844fc..d606658 100644 (file)
@@ -22,7 +22,6 @@
  * struct thread_info
  */
 struct thread_struct {
-       unsigned long ksp;      /* kernel mode stack pointer */
        unsigned long callee_reg;       /* pointer to callee regs */
        unsigned long fault_address;    /* dbls as brkpt holder as well */
 #ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
@@ -33,9 +32,7 @@ struct thread_struct {
 #endif
 };
 
-#define INIT_THREAD  {                          \
-       .ksp = sizeof(init_stack) + (unsigned long) init_stack, \
-}
+#define INIT_THREAD  { }
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
@@ -56,7 +53,7 @@ struct task_struct;
  * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
-#define TSK_K_ESP(tsk)         (tsk->thread.ksp)
+#define TSK_K_ESP(tsk)         (task_thread_info(tsk)->ksp)
 
 #define TSK_K_REG(tsk, off)    (*((unsigned long *)(TSK_K_ESP(tsk) + \
                                        sizeof(struct callee_regs) + off)))
index 5869a74..4a2b30f 100644 (file)
 
 #ifndef __ASSEMBLY__
 
+typedef union {
+       struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned long state:8, vec:8, cause:8, param:8;
+#else
+               unsigned long param:8, cause:8, vec:8, state:8;
+#endif
+       };
+       unsigned long full;
+} ecr_reg;
+
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
 #ifdef CONFIG_ISA_ARCOMPACT
@@ -40,23 +51,10 @@ struct pt_regs {
         *      Last word used by Linux for extra state mgmt (syscall-restart)
         * For interrupts, use artificial ECR values to note current prio-level
         */
-       union {
-               struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-                       unsigned long state:8, ecr_vec:8,
-                                     ecr_cause:8, ecr_param:8;
-#else
-                       unsigned long ecr_param:8, ecr_cause:8,
-                                     ecr_vec:8, state:8;
-#endif
-               };
-               unsigned long event;
-       };
-
-       unsigned long user_r25;
+       ecr_reg ecr;
 };
 
-#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
 
 #else
 
@@ -64,28 +62,14 @@ struct pt_regs {
 
        unsigned long orig_r0;
 
-       union {
-               struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-                       unsigned long state:8, ecr_vec:8,
-                                     ecr_cause:8, ecr_param:8;
-#else
-                       unsigned long ecr_param:8, ecr_cause:8,
-                                     ecr_vec:8, state:8;
-#endif
-               };
-               unsigned long event;
-       };
-
-       unsigned long bta;      /* bta_l1, bta_l2, erbta */
+       ecr_reg ecr;            /* Exception Cause Reg */
 
-       unsigned long user_r25;
+       unsigned long bta;      /* erbta */
 
-       unsigned long r26;      /* gp */
        unsigned long fp;
-       unsigned long sp;       /* user/kernel sp depending on where we came from  */
-
-       unsigned long r12, r30;
+       unsigned long r30;
+       unsigned long r12;
+       unsigned long r26;      /* gp */
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
        unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */
@@ -94,6 +78,8 @@ struct pt_regs {
        unsigned long DSP_CTRL;
 #endif
 
+       unsigned long sp;       /* user/kernel sp depending on entry  */
+
        /*------- Below list auto saved by h/w -----------*/
        unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
 
@@ -134,13 +120,13 @@ struct callee_regs {
 /* return 1 if PC in delay slot */
 #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
 
-#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
-#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+#define in_syscall(regs)    ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param)
+#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param)
 
 #define STATE_SCALL_RESTARTED  0x01
 
-#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
-#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
+#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(regs) !(regs->ecr.state &  STATE_SCALL_RESTARTED)
 
 #define current_pt_regs()                                      \
 ({                                                             \
@@ -181,6 +167,9 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
        return *(unsigned long *)((unsigned long)regs + offset);
 }
 
+extern int syscall_trace_entry(struct pt_regs *);
+extern void syscall_trace_exit(struct pt_regs *);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PTRACE_H */
index 028a8cf..1c6db59 100644 (file)
@@ -35,11 +35,11 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL3(v, v2, s)    IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
 extern void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_mmu_bcr(void);
+extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 
 extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
+extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void __init handle_uboot_args(void);
 
 #endif /* __ASMARC_SETUP_H */
index d856491..e0913f5 100644 (file)
@@ -29,6 +29,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void __init smp_init_cpus(void);
 extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_wait_to_boot(int);
+extern void start_kernel_secondary(void);
 
 /*
  * API expected BY platform smp code (FROM arch smp code)
index 6ba7fe4..4c530cf 100644 (file)
  */
 struct thread_info {
        unsigned long flags;            /* low level flags */
+       unsigned long ksp;              /* kernel mode stack top in __switch_to */
        int preempt_count;              /* 0 => preemptable, <0 => BUG */
-       struct task_struct *task;       /* main task structure */
-       __u32 cpu;                      /* current CPU */
+       int cpu;                        /* current CPU */
        unsigned long thr_ptr;          /* TLS ptr */
+       struct task_struct *task;       /* main task structure */
 };
 
 /*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
+ * initilaize thread_info for any @tsk
+ *  - this is not related to init_task per se
  */
 #define INIT_THREAD_INFO(tsk)                  \
 {                                              \
index 9971247..1e8809e 100644 (file)
@@ -146,8 +146,9 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
        if (n == 0)
                return 0;
 
-       /* unaligned */
-       if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+       /* fallback for unaligned access when hardware doesn't support */
+       if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+            (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
                unsigned char tmp;
 
@@ -373,8 +374,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
        if (n == 0)
                return 0;
 
-       /* unaligned */
-       if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+       /* fallback for unaligned access when hardware doesn't support */
+       if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+            (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
                unsigned char tmp;
 
@@ -584,7 +586,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
        return res;
 }
 
-static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+static inline unsigned long __clear_user(void __user *to, unsigned long n)
 {
        long res = n;
        unsigned char *d_char = to;
@@ -626,17 +628,10 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
        return res;
 }
 
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
-#define __clear_user(d, n)             __arc_clear_user(d, n)
-#else
-extern unsigned long arc_clear_user_noinline(void __user *to,
-               unsigned long n);
-#define __clear_user(d, n)             arc_clear_user_noinline(d, n)
-#endif
+#define __clear_user                   __clear_user
 
 #include <asm-generic/uaccess.h>
 
index 0723d88..95fbf93 100644 (file)
@@ -5,6 +5,8 @@
 
 obj-y  := head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
 obj-y  += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-y  += ctx_sw_asm.o
+
 obj-$(CONFIG_ISA_ARCOMPACT)            += entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)                        += entry-arcv2.o intc-arcv2.o
 
@@ -24,11 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT
 CFLAGS_fpu.o   += -mdpfp
 endif
 
-ifdef CONFIG_ARC_DW2_UNWIND
-CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
-obj-y += ctx_sw.o
-else
-obj-y += ctx_sw_asm.o
-endif
-
 extra-y := vmlinux.lds
index 0e88403..f77deb7 100644 (file)
@@ -20,13 +20,13 @@ int main(void)
 
        BLANK();
 
-       DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
        DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
        DEFINE(THREAD_FAULT_ADDR,
               offsetof(struct thread_struct, fault_address));
 
        BLANK();
 
+       DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp));
        DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
        DEFINE(THREAD_INFO_PREEMPT_COUNT,
               offsetof(struct thread_info, preempt_count));
@@ -46,7 +46,8 @@ int main(void)
        BLANK();
 
        DEFINE(PT_status32, offsetof(struct pt_regs, status32));
-       DEFINE(PT_event, offsetof(struct pt_regs, event));
+       DEFINE(PT_event, offsetof(struct pt_regs, ecr));
+       DEFINE(PT_bta, offsetof(struct pt_regs, bta));
        DEFINE(PT_sp, offsetof(struct pt_regs, sp));
        DEFINE(PT_r0, offsetof(struct pt_regs, r0));
        DEFINE(PT_r1, offsetof(struct pt_regs, r1));
@@ -61,13 +62,9 @@ int main(void)
        DEFINE(PT_r26, offsetof(struct pt_regs, r26));
        DEFINE(PT_ret, offsetof(struct pt_regs, ret));
        DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+       OFFSET(PT_fp, pt_regs, fp);
        DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
        DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
-       DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
-
-       DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
-       DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-
 #ifdef CONFIG_ISA_ARCV2
        OFFSET(PT_r12, pt_regs, r12);
        OFFSET(PT_r30, pt_regs, r30);
@@ -80,5 +77,8 @@ int main(void)
        OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
 #endif
 
+       DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+       DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+
        return 0;
 }
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
deleted file mode 100644 (file)
index 1a76f2d..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- *  -"C" version of lowest level context switch asm macro called by schedular
- *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
- *   backtrace out of it (e.g. tasks sleeping in kernel).
- *   So we cheat a bit by writing almost similar code in inline-asm.
- *  -This is a hacky way of doing things, but there is no other simple way.
- *   I don't want/intend to extend unwinding code to understand raw asm
- */
-
-#include <asm/asm-offsets.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-
-#define KSP_WORD_OFF   ((TASK_THREAD + THREAD_KSP) / 4)
-
-struct task_struct *__sched
-__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
-{
-       unsigned int tmp;
-       unsigned int prev = (unsigned int)prev_task;
-       unsigned int next = (unsigned int)next_task;
-
-       __asm__ __volatile__(
-               /* FP/BLINK save generated by gcc (standard function prologue */
-               "st.a    r13, [sp, -4]   \n\t"
-               "st.a    r14, [sp, -4]   \n\t"
-               "st.a    r15, [sp, -4]   \n\t"
-               "st.a    r16, [sp, -4]   \n\t"
-               "st.a    r17, [sp, -4]   \n\t"
-               "st.a    r18, [sp, -4]   \n\t"
-               "st.a    r19, [sp, -4]   \n\t"
-               "st.a    r20, [sp, -4]   \n\t"
-               "st.a    r21, [sp, -4]   \n\t"
-               "st.a    r22, [sp, -4]   \n\t"
-               "st.a    r23, [sp, -4]   \n\t"
-               "st.a    r24, [sp, -4]   \n\t"
-#ifndef CONFIG_ARC_CURR_IN_REG
-               "st.a    r25, [sp, -4]   \n\t"
-#else
-               "sub     sp, sp, 4      \n\t"   /* usual r25 placeholder */
-#endif
-
-               /* set ksp of outgoing task in tsk->thread.ksp */
-#if KSP_WORD_OFF <= 255
-               "st.as   sp, [%3, %1]    \n\t"
-#else
-               /*
-                * Workaround for NR_CPUS=4k
-                * %1 is bigger than 255 (S9 offset for st.as)
-                */
-               "add2    r24, %3, %1     \n\t"
-               "st      sp, [r24]       \n\t"
-#endif
-
-               /*
-                * setup _current_task with incoming tsk.
-                * optionally, set r25 to that as well
-                * For SMP extra work to get to &_current_task[cpu]
-                * (open coded SET_CURR_TASK_ON_CPU)
-                */
-#ifndef CONFIG_SMP
-               "st  %2, [@_current_task]       \n\t"
-#else
-               "lr   r24, [identity]           \n\t"
-               "lsr  r24, r24, 8               \n\t"
-               "bmsk r24, r24, 7               \n\t"
-               "add2 r24, @_current_task, r24  \n\t"
-               "st   %2,  [r24]                \n\t"
-#endif
-#ifdef CONFIG_ARC_CURR_IN_REG
-               "mov r25, %2   \n\t"
-#endif
-
-               /* get ksp of incoming task from tsk->thread.ksp */
-               "ld.as  sp, [%2, %1]   \n\t"
-
-               /* start loading it's CALLEE reg file */
-
-#ifndef CONFIG_ARC_CURR_IN_REG
-               "ld.ab   r25, [sp, 4]   \n\t"
-#else
-               "add    sp, sp, 4       \n\t"
-#endif
-               "ld.ab   r24, [sp, 4]   \n\t"
-               "ld.ab   r23, [sp, 4]   \n\t"
-               "ld.ab   r22, [sp, 4]   \n\t"
-               "ld.ab   r21, [sp, 4]   \n\t"
-               "ld.ab   r20, [sp, 4]   \n\t"
-               "ld.ab   r19, [sp, 4]   \n\t"
-               "ld.ab   r18, [sp, 4]   \n\t"
-               "ld.ab   r17, [sp, 4]   \n\t"
-               "ld.ab   r16, [sp, 4]   \n\t"
-               "ld.ab   r15, [sp, 4]   \n\t"
-               "ld.ab   r14, [sp, 4]   \n\t"
-               "ld.ab   r13, [sp, 4]   \n\t"
-
-               /* last (ret value) = prev : although for ARC it mov r0, r0 */
-               "mov     %0, %3        \n\t"
-
-               /* FP/BLINK restore generated by gcc (standard func epilogue */
-
-               : "=r"(tmp)
-               : "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-               : "blink"
-       );
-
-       return (struct task_struct *)tmp;
-}
index 02c4614..48e1f21 100644 (file)
 #include <asm/entry.h>       /* For the SAVE_* macros */
 #include <asm/asm-offsets.h>
 
-#define KSP_WORD_OFF   ((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
+; IN
+;  - r0: prev task (also current)
+;  - r1: next task
+; OUT
+;  - r0: prev task (so r0 not touched)
 
        .section .sched.text,"ax",@progbits
-       .align 4
-       .global __switch_to
-       .type   __switch_to, @function
-__switch_to:
-       CFI_STARTPROC
-
-       /* Save regs on kernel mode stack of task */
-       st.a    blink, [sp, -4]
-       st.a    fp, [sp, -4]
-       SAVE_CALLEE_SAVED_KERNEL
+ENTRY_CFI(__switch_to)
 
-       /* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF  <= 255
-       st.as  sp, [r0, KSP_WORD_OFF]
-#else
-       /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
-       add2    r24, r0, KSP_WORD_OFF
-       st      sp, [r24]
-#endif
-       /*
-       * Return last task in r0 (return reg)
-       * On ARC, Return reg = First Arg reg = r0.
-       * Since we already have last task in r0,
-       * don't need to do anything special to return it
-       */
+       /* save kernel stack frame regs of @prev task */
+       push    blink
+       CFI_DEF_CFA_OFFSET 4
+       CFI_OFFSET r31, -4
+
+       push    fp
+       CFI_DEF_CFA_OFFSET 8
+       CFI_OFFSET r27, -8
+
+       mov     fp, sp
+       CFI_DEF_CFA_REGISTER r27
+
+       /* kernel mode callee regs of @prev */
+       SAVE_CALLEE_SAVED_KERNEL
 
        /*
-        * switch to new task, contained in r1
-        * Temp reg r3 is required to get the ptr to store val
+        * save final SP to @prev->thread_info.ksp
+        * @prev is "current" so thread_info derived from SP
         */
-       SET_CURR_TASK_ON_CPU  r1, r3
+       GET_CURR_THR_INFO_FROM_SP  r10
+       st      sp,  [r10, THREAD_INFO_KSP]
+
+       /* update @next in _current_task[] and GP register caching it */
+       SET_CURR_TASK_ON_CPU  r1, r10
 
-       /* reload SP with kernel mode stack pointer in task->thread.ksp */
-       ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+       /* load SP from @next->thread_info.ksp */
+       ld      r10, [r1, TASK_THREAD_INFO]
+       ld      sp,  [r10, THREAD_INFO_KSP]
 
-       /* restore the registers */
+       /* restore callee regs, stack frame regs of @next */
        RESTORE_CALLEE_SAVED_KERNEL
-       ld.ab   fp, [sp, 4]
-       ld.ab   blink, [sp, 4]
-       j       [blink]
 
+       pop     fp
+       CFI_RESTORE r27
+       CFI_DEF_CFA r28, 4
+
+       pop     blink
+       CFI_RESTORE r31
+       CFI_DEF_CFA_OFFSET 0
+
+       j      [blink]
 END_CFI(__switch_to)
index 721d465..4c9e614 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <asm/mach_desc.h>
+#include <asm/serial.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
 
index a7e6a21..2e49c81 100644 (file)
@@ -125,11 +125,6 @@ ENTRY(mem_service)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_memory_error
        b   ret_from_exception
 END(mem_service)
@@ -138,11 +133,6 @@ ENTRY(EV_Misaligned)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]   ; Faulting Data address
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        SAVE_CALLEE_SAVED_USER
        mov r2, sp              ; callee_regs
 
@@ -163,11 +153,6 @@ ENTRY(EV_TLBProtV)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]   ; Faulting Data address
-       mov r1, sp      ; pt_regs
-
-       FAKE_RET_FROM_EXCPN
-
        mov blink, ret_from_exception
        b   do_page_fault
 
index 5cb0cd7..774c03c 100644 (file)
@@ -254,18 +254,7 @@ END(handle_interrupt_level1)
 
 ENTRY(EV_TLBProtV)
 
-       EXCEPTION_PROLOGUE
-
-       mov r2, r10     ; ECR set into r10 already
-       lr  r0, [efa]   ; Faulting Data address (not part of pt_regs saved above)
-
-       ; Exception auto-disables further Intr/exceptions.
-       ; Re-enable them by pretending to return from exception
-       ; (so rest of handler executes in pure K mode)
-
-       FAKE_RET_FROM_EXCPN
-
-       mov   r1, sp    ; Handle to pt_regs
+       EXCEPTION_PROLOGUE      ; ECR returned in r10
 
        ;------ (5) Type of Protection Violation? ----------
        ;
@@ -273,8 +262,7 @@ ENTRY(EV_TLBProtV)
        ;   -Access Violation   : 00_23_(00|01|02|03)_00
        ;                                x  r  w  r+w
        ;   -Unaligned Access   : 00_23_04_00
-       ;
-       bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+       bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
        ;========= (6a) Access Violation Processing ========
        bl  do_page_fault
@@ -303,9 +291,6 @@ END(EV_TLBProtV)
 ENTRY(call_do_page_fault)
 
        EXCEPTION_PROLOGUE
-       lr  r0, [efa]   ; Faulting Data address
-       mov   r1, sp
-       FAKE_RET_FROM_EXCPN
 
        mov blink, ret_from_exception
        b  do_page_fault
index 54e91df..089f668 100644 (file)
@@ -80,11 +80,6 @@ ENTRY(instr_service)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_insterror_or_kprobe
        b   ret_from_exception
 END(instr_service)
@@ -95,16 +90,15 @@ END(instr_service)
 
 ENTRY(EV_MachineCheck)
 
-       EXCEPTION_PROLOGUE
+       EXCEPTION_PROLOGUE_KEEP_AE      ; ECR returned in r10
 
-       lr  r2, [ecr]
        lr  r0, [efa]
        mov r1, sp
 
        ; MC excpetions disable MMU
        ARC_MMU_REENABLE r3
 
-       lsr     r3, r2, 8
+       lsr     r3, r10, 8
        bmsk    r3, r3, 7
        brne    r3, ECR_C_MCHK_DUP_TLB, 1f
 
@@ -129,11 +123,6 @@ ENTRY(EV_PrivilegeV)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_privilege_fault
        b   ret_from_exception
 END(EV_PrivilegeV)
@@ -145,11 +134,6 @@ ENTRY(EV_Extension)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_extension_fault
        b   ret_from_exception
 END(EV_Extension)
@@ -160,20 +144,19 @@ END(EV_Extension)
 ; syscall Tracing
 ; ---------------------------------------------
 tracesys:
-       ; save EFA in case tracer wants the PC of traced task
-       ; using ERET won't work since next-PC has already committed
+       ; safekeep EFA (r12) if syscall tracer wanted PC
+       ; for traps, ERET is pre-commit so points to next-PC
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
        st  r12, [r11, THREAD_FAULT_ADDR]       ; thread.fault_address
 
-       ; PRE Sys Call Ptrace hook
-       mov r0, sp                      ; pt_regs needed
-       bl  @syscall_trace_entry
+       ; PRE syscall trace hook
+       mov r0, sp                              ; pt_regs
+       bl  @syscall_trace_enter
 
        ; Tracing code now returns the syscall num (orig or modif)
        mov r8, r0
 
        ; Do the Sys Call as we normally would.
-       ; Validate the Sys Call number
        cmp     r8,  NR_syscalls - 1
        mov.hi  r0, -ENOSYS
        bhi     tracesys_exit
@@ -190,37 +173,36 @@ tracesys:
        ld  r6, [sp, PT_r6]
        ld  r7, [sp, PT_r7]
        ld.as   r9, [sys_call_table, r8]
-       jl      [r9]        ; Entry into Sys Call Handler
+       jl      [r9]
 
 tracesys_exit:
-       st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+       st  r0, [sp, PT_r0]
 
-       ;POST Sys Call Ptrace Hook
+       ; POST syscall trace hook
        mov r0, sp              ; pt_regs needed
        bl  @syscall_trace_exit
-       b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
-       ; we'd done before calling post hook above
+
+       ; don't call ret_from_system_call as it saves r0, already done above
+       b   ret_from_exception
 
 ; ---------------------------------------------
 ; Breakpoint TRAP
 ; ---------------------------------------------
 trap_with_param:
        mov r0, r12     ; EFA in case ptracer/gdb wants stop_pc
-       mov r1, sp
+       mov r1, sp      ; pt_regs
 
-       ; Save callee regs in case gdb wants to have a look
-       ; SP will grow up by size of CALLEE Reg-File
-       ; NOTE: clobbers r12
+       ; save callee regs in case tracer/gdb wants to peek
        SAVE_CALLEE_SAVED_USER
 
-       ; save location of saved Callee Regs @ thread_struct->pc
+       ; safekeep ref to callee regs
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
        st  sp, [r10, THREAD_CALLEE_REG]
 
-       ; Call the trap handler
+       ; call the non syscall trap handler
        bl  do_non_swi_trap
 
-       ; unwind stack to discard Callee saved Regs
+       ; unwind stack to discard callee regs
        DISCARD_CALLEE_SAVED_USER
 
        b   ret_from_exception
@@ -232,37 +214,33 @@ trap_with_param:
 
 ENTRY(EV_Trap)
 
-       EXCEPTION_PROLOGUE
+       EXCEPTION_PROLOGUE_KEEP_AE
 
        lr  r12, [efa]
 
        FAKE_RET_FROM_EXCPN
 
-       ;============ TRAP 1   :breakpoints
-       ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+       ;============ TRAP N : breakpoints, kprobes etc
        bmsk.f 0, r10, 7
        bnz    trap_with_param
 
-       ;============ TRAP  (no param): syscall top level
+       ;============ TRAP 0 (no param): syscall
 
-       ; If syscall tracing ongoing, invoke pre-post-hooks
+       ; syscall tracing ongoing, invoke pre-post-hooks around syscall
        GET_CURR_THR_INFO_FLAGS   r10
        and.f 0, r10, _TIF_SYSCALL_WORK
        bnz   tracesys  ; this never comes back
 
        ;============ Normal syscall case
 
-       ; syscall num shd not exceed the total system calls avail
        cmp     r8,  NR_syscalls - 1
        mov.hi  r0, -ENOSYS
        bhi     .Lret_from_system_call
 
-       ; Offset into the syscall_table and call handler
        ld.as   r9,[sys_call_table, r8]
-       jl      [r9]        ; Entry into Sys Call Handler
+       jl      [r9]
 
 .Lret_from_system_call:
-
        st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
        ; fall through to ret_from_exception
@@ -318,7 +296,7 @@ resume_user_mode_begin:
        ;      tracer might call PEEKUSR(CALLEE reg)
        ;
        ; NOTE: SP will grow up by size of CALLEE Reg-File
-       SAVE_CALLEE_SAVED_USER          ; clobbers r12
+       SAVE_CALLEE_SAVED_USER
 
        ; save location of saved Callee Regs @ thread_struct->callee
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
index 5cda19d..6788987 100644 (file)
@@ -108,7 +108,7 @@ static void arcv2_irq_unmask(struct irq_data *data)
        write_aux_reg(AUX_IRQ_ENABLE, 1);
 }
 
-void arcv2_irq_enable(struct irq_data *data)
+static void arcv2_irq_enable(struct irq_data *data)
 {
        /* set default priority */
        write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
index 345a000..4f2b595 100644 (file)
@@ -175,7 +175,7 @@ void kgdb_trap(struct pt_regs *regs)
         * with trap_s 4 (compiled) breakpoints, continuation needs to
         * start after the breakpoint.
         */
-       if (regs->ecr_param == 3)
+       if (regs->ecr.param == 3)
                instruction_pointer(regs) -= BREAK_INSTR_SIZE;
 
        kgdb_handle_exception(1, SIGTRAP, 0, regs);
index f9fdb55..55373ca 100644 (file)
@@ -165,8 +165,6 @@ static void mcip_probe_n_setup(void)
                IS_AVAIL1(mp.idu, "IDU "),
                IS_AVAIL1(mp.dbg, "DEBUG "),
                IS_AVAIL1(mp.gfrc, "GFRC"));
-
-       cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
 }
 
 struct plat_smp_ops plat_smp_ops = {
index 980b71d..186ceab 100644 (file)
@@ -141,7 +141,7 @@ asmlinkage void ret_from_fork(void);
  * |    unused      |
  * |                |
  * ------------------
- * |     r25        |   <==== top of Stack (thread.ksp)
+ * |     r25        |   <==== top of Stack (thread_info.ksp)
  * ~                ~
  * |    --to--      |   (CALLEE Regs of kernel mode)
  * |     r13        |
@@ -162,7 +162,6 @@ asmlinkage void ret_from_fork(void);
  * |      SP        |
  * |    orig_r0     |
  * |    event/ECR   |
- * |    user_r25    |
  * ------------------  <===== END of PAGE
  */
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -182,14 +181,14 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
        c_callee = ((struct callee_regs *)childksp) - 1;
 
        /*
-        * __switch_to() uses thread.ksp to start unwinding stack
+        * __switch_to() uses thread_info.ksp to start unwinding stack
         * For kernel threads we don't need to create callee regs, the
         * stack layout nevertheless needs to remain the same.
         * Also, since __switch_to anyways unwinds callee regs, we use
         * this to populate kernel thread entry-pt/args into callee regs,
         * so that ret_from_kernel_thread() becomes simpler.
         */
-       p->thread.ksp = (unsigned long)c_callee;        /* THREAD_KSP */
+       task_thread_info(p)->ksp = (unsigned long)c_callee;     /* THREAD_INFO_KSP */
 
        /* __switch_to expects FP(0), BLINK(return addr) at top */
        childksp[0] = 0;                        /* fp */
@@ -243,16 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
         */
        c_callee->r25 = task_thread_info(p)->thr_ptr;
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /*
-        * setup usermode thread pointer #2:
-        * however for this special use of r25 in kernel, __switch_to() sets
-        * r25 for kernel needs and only in the final return path is usermode
-        * r25 setup, from pt_regs->user_r25. So set that up as well
-        */
-       c_regs->user_r25 = c_callee->r25;
-#endif
-
        return 0;
 }
 
index 2abdcd9..e0c233c 100644 (file)
@@ -46,8 +46,7 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(r0),
        REG_OFFSET_NAME(sp),
        REG_OFFSET_NAME(orig_r0),
-       REG_OFFSET_NAME(event),
-       REG_OFFSET_NAME(user_r25),
+       REG_OFFSET_NAME(ecr),
        REG_OFFSET_END,
 };
 
@@ -55,9 +54,8 @@ static const struct pt_regs_offset regoffset_table[] = {
 
 static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(orig_r0),
-       REG_OFFSET_NAME(event),
+       REG_OFFSET_NAME(ecr),
        REG_OFFSET_NAME(bta),
-       REG_OFFSET_NAME(user_r25),
        REG_OFFSET_NAME(r26),
        REG_OFFSET_NAME(fp),
        REG_OFFSET_NAME(sp),
@@ -341,7 +339,7 @@ long arch_ptrace(struct task_struct *child, long request,
        return ret;
 }
 
-asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                if (ptrace_report_syscall_entry(regs))
index 41f07b3..4dcf858 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/mach_desc.h>
 #include <asm/smp.h>
 #include <asm/dsp-impl.h>
+#include <soc/arc/mcip.h>
 
 #define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
 
@@ -43,19 +44,22 @@ const struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];    /* For stack switching */
 
-struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+struct cpuinfo_arc {
+       int arcver;
+       unsigned int t0:1, t1:1;
+       struct {
+               unsigned long base;
+               unsigned int sz;
+       } iccm, dccm;
+};
+
+#ifdef CONFIG_ISA_ARCV2
 
-static const struct id_to_str arc_legacy_rel[] = {
+static const struct id_to_str arc_hs_rel[] = {
        /* ID.ARCVER,   Release */
-#ifdef CONFIG_ISA_ARCOMPACT
-       { 0x34,         "R4.10"},
-       { 0x35,         "R4.11"},
-#else
        { 0x51,         "R2.0" },
        { 0x52,         "R2.1" },
        { 0x53,         "R3.0" },
-#endif
-       { 0x00,         NULL   }
 };
 
 static const struct id_to_str arc_hs_ver54_rel[] = {
@@ -66,323 +70,296 @@ static const struct id_to_str arc_hs_ver54_rel[] = {
        {  3,           "R4.00a"},
        {  0xFF,        NULL   }
 };
+#endif
 
-static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+static int
+arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-       if (is_isa_arcompact()) {
-               struct bcr_iccm_arcompact iccm;
-               struct bcr_dccm_arcompact dccm;
+       int n = 0;
+#ifdef CONFIG_ISA_ARCOMPACT
+       char *cpu_nm, *isa_nm = "ARCompact";
+       struct bcr_fp_arcompact fpu_sp, fpu_dp;
+       int atomic = 0, be, present;
+       int bpu_full, bpu_cache, bpu_pred;
+       struct bcr_bpu_arcompact bpu;
+       struct bcr_iccm_arcompact iccm;
+       struct bcr_dccm_arcompact dccm;
+       struct bcr_generic isa;
 
-               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-               if (iccm.ver) {
-                       cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
-                       cpu->iccm.base_addr = iccm.base << 16;
-               }
+       READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-               if (dccm.ver) {
-                       unsigned long base;
-                       cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
+       if (!isa.ver)   /* ISA BCR absent, use Kconfig info */
+               atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+       else {
+               /* ARC700_BUILD only has 2 bits of isa info */
+               atomic = isa.info & 1;
+       }
 
-                       base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
-                       cpu->dccm.base_addr = base & ~0xF;
-               }
-       } else {
-               struct bcr_iccm_arcv2 iccm;
-               struct bcr_dccm_arcv2 dccm;
-               unsigned long region;
-
-               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-               if (iccm.ver) {
-                       cpu->iccm.sz = 256 << iccm.sz00;        /* 512B to 16M */
-                       if (iccm.sz00 == 0xF && iccm.sz01 > 0)
-                               cpu->iccm.sz <<= iccm.sz01;
-
-                       region = read_aux_reg(ARC_REG_AUX_ICCM);
-                       cpu->iccm.base_addr = region & 0xF0000000;
-               }
+       be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
 
-               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-               if (dccm.ver) {
-                       cpu->dccm.sz = 256 << dccm.sz0;
-                       if (dccm.sz0 == 0xF && dccm.sz1 > 0)
-                               cpu->dccm.sz <<= dccm.sz1;
+       if (info->arcver < 0x34)
+               cpu_nm = "ARC750";
+       else
+               cpu_nm = "ARC770";
 
-                       region = read_aux_reg(ARC_REG_AUX_DCCM);
-                       cpu->dccm.base_addr = region & 0xF0000000;
-               }
-       }
-}
+       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n",
+                      c, cpu_nm, isa_nm,
+                      IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                      IS_AVAIL1(be, "[Big-Endian]"));
 
-static void decode_arc_core(struct cpuinfo_arc *cpu)
-{
-       struct bcr_uarch_build_arcv2 uarch;
-       const struct id_to_str *tbl;
-
-       if (cpu->core.family < 0x54) { /* includes arc700 */
+       READ_BCR(ARC_REG_FP_BCR, fpu_sp);
+       READ_BCR(ARC_REG_DPFP_BCR, fpu_dp);
 
-               for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
-                       if (cpu->core.family == tbl->id) {
-                               cpu->release = tbl->str;
-                               break;
-                       }
-               }
+       if (fpu_sp.ver | fpu_dp.ver)
+               n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+                              IS_AVAIL1(fpu_sp.ver, "SP "),
+                              IS_AVAIL1(fpu_dp.ver, "DP "));
 
-               if (is_isa_arcompact())
-                       cpu->name = "ARC700";
-               else if (tbl->str)
-                       cpu->name = "HS38";
-               else
-                       cpu->name = cpu->release = "Unknown";
+       READ_BCR(ARC_REG_BPU_BCR, bpu);
+       bpu_full = bpu.fam ? 1 : 0;
+       bpu_cache = 256 << (bpu.ent - 1);
+       bpu_pred = 256 << (bpu.ent - 1);
 
-               return;
+       n += scnprintf(buf + n, len - n,
+                       "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+                       IS_AVAIL1(bpu_full, "full"),
+                       IS_AVAIL1(!bpu_full, "partial"),
+                       bpu_cache, bpu_pred);
+
+       READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+       if (iccm.ver) {
+               info->iccm.sz = 4096 << iccm.sz;        /* 8K to 512K */
+               info->iccm.base = iccm.base << 16;
        }
 
-       /*
-        * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
-        * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
-        * releases only update it.
-        */
-       READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
-
-       if (uarch.prod == 4) {
-               cpu->name = "HS48";
-               cpu->extn.dual = 1;
+       READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+       if (dccm.ver) {
+               unsigned long base;
+               info->dccm.sz = 2048 << dccm.sz;        /* 2K to 256K */
 
-       } else {
-               cpu->name = "HS38";
+               base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+               info->dccm.base = base & ~0xF;
        }
 
-       for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
-               if (uarch.maj == tbl->id) {
-                       cpu->release = tbl->str;
-                       break;
-               }
-       }
+       /* ARCompact ISA specific sanity checks */
+       present = fpu_dp.ver;   /* SP has no arch visible regs */
+       CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+#endif
+       return n;
+
 }
 
-static void read_arc_build_cfg_regs(void)
+static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-       struct bcr_timer timer;
-       struct bcr_generic bcr;
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+       int n = 0;
+#ifdef CONFIG_ISA_ARCV2
+       const char *release, *cpu_nm, *isa_nm = "ARCv2";
+       int dual_issue = 0, dual_enb = 0, mpy_opt, present;
+       int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
+       char mpy_nm[16], lpb_nm[32];
        struct bcr_isa_arcv2 isa;
-       struct bcr_actionpoint ap;
-
-       FIX_PTR(cpu);
+       struct bcr_mpy mpy;
+       struct bcr_fp_arcv2 fpu;
+       struct bcr_bpu_arcv2 bpu;
+       struct bcr_lpb lpb;
+       struct bcr_iccm_arcv2 iccm;
+       struct bcr_dccm_arcv2 dccm;
+       struct bcr_erp erp;
 
-       READ_BCR(AUX_IDENTITY, cpu->core);
-       decode_arc_core(cpu);
-
-       READ_BCR(ARC_REG_TIMERS_BCR, timer);
-       cpu->extn.timer0 = timer.t0;
-       cpu->extn.timer1 = timer.t1;
-       cpu->extn.rtc = timer.rtc;
-
-       cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+       /*
+        * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+        * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+        * releases only update it.
+        */
 
-       READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+       cpu_nm = "HS38";
 
-       /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
-       read_decode_ccm_bcr(cpu);
+       if (info->arcver > 0x50 && info->arcver <= 0x53) {
+               release = arc_hs_rel[info->arcver - 0x51].str;
+       } else {
+               const struct id_to_str *tbl;
+               struct bcr_uarch_build uarch;
 
-       read_decode_mmu_bcr();
-       read_decode_cache_bcr();
+               READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
 
-       if (is_isa_arcompact()) {
-               struct bcr_fp_arcompact sp, dp;
-               struct bcr_bpu_arcompact bpu;
-
-               READ_BCR(ARC_REG_FP_BCR, sp);
-               READ_BCR(ARC_REG_DPFP_BCR, dp);
-               cpu->extn.fpu_sp = sp.ver ? 1 : 0;
-               cpu->extn.fpu_dp = dp.ver ? 1 : 0;
-
-               READ_BCR(ARC_REG_BPU_BCR, bpu);
-               cpu->bpu.ver = bpu.ver;
-               cpu->bpu.full = bpu.fam ? 1 : 0;
-               if (bpu.ent) {
-                       cpu->bpu.num_cache = 256 << (bpu.ent - 1);
-                       cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+               for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+                       if (uarch.maj == tbl->id) {
+                               release = tbl->str;
+                               break;
+                       }
                }
-       } else {
-               struct bcr_fp_arcv2 spdp;
-               struct bcr_bpu_arcv2 bpu;
-
-               READ_BCR(ARC_REG_FP_V2_BCR, spdp);
-               cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
-               cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
-
-               READ_BCR(ARC_REG_BPU_BCR, bpu);
-               cpu->bpu.ver = bpu.ver;
-               cpu->bpu.full = bpu.ft;
-               cpu->bpu.num_cache = 256 << bpu.bce;
-               cpu->bpu.num_pred = 2048 << bpu.pte;
-               cpu->bpu.ret_stk = 4 << bpu.rse;
-
-               /* if dual issue hardware, is it enabled ? */
-               if (cpu->extn.dual) {
+               if (uarch.prod == 4) {
                        unsigned int exec_ctrl;
 
+                       cpu_nm = "HS48";
+                       dual_issue = 1;
+                       /* if dual issue hardware, is it enabled ? */
                        READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-                       cpu->extn.dual_enb = !(exec_ctrl & 1);
+                       dual_enb = !(exec_ctrl & 1);
                }
        }
 
-       READ_BCR(ARC_REG_AP_BCR, ap);
-       if (ap.ver) {
-               cpu->extn.ap_num = 2 << ap.num;
-               cpu->extn.ap_full = !ap.min;
-       }
-
-       READ_BCR(ARC_REG_SMART_BCR, bcr);
-       cpu->extn.smart = bcr.ver ? 1 : 0;
-
-       READ_BCR(ARC_REG_RTT_BCR, bcr);
-       cpu->extn.rtt = bcr.ver ? 1 : 0;
-
        READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-       /* some hacks for lack of feature BCR info in old ARC700 cores */
-       if (is_isa_arcompact()) {
-               if (!isa.ver)   /* ISA BCR absent, use Kconfig info */
-                       cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-               else {
-                       /* ARC700_BUILD only has 2 bits of isa info */
-                       struct bcr_generic bcr = *(struct bcr_generic *)&isa;
-                       cpu->isa.atomic = bcr.info & 1;
-               }
-
-               cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+                      c, cpu_nm, release, isa_nm,
+                      IS_AVAIL1(isa.be, "[Big-Endian]"),
+                      IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue "));
+
+       READ_BCR(ARC_REG_MPY_BCR, mpy);
+       mpy_opt = 2;    /* stock MPY/MPYH */
+       if (mpy.dsp)    /* OPT 7-9 */
+               mpy_opt = mpy.dsp + 6;
+
+       scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt);
+
+       READ_BCR(ARC_REG_FP_V2_BCR, fpu);
+
+       n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n",
+                      IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                      IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+                      IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+                      IS_AVAIL1(mpy.ver, mpy_nm),
+                      IS_AVAIL1(isa.div_rem, "div_rem "),
+                      IS_AVAIL1((fpu.sp | fpu.dp), "  FPU:"),
+                      IS_AVAIL1(fpu.sp, " sp"),
+                      IS_AVAIL1(fpu.dp, " dp"));
+
+       READ_BCR(ARC_REG_BPU_BCR, bpu);
+       bpu_full = bpu.ft;
+       bpu_cache = 256 << bpu.bce;
+       bpu_pred = 2048 << bpu.pte;
+       bpu_ret_stk = 4 << bpu.rse;
+
+       READ_BCR(ARC_REG_LPB_BUILD, lpb);
+       if (lpb.ver) {
+               unsigned int ctl;
+               ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+               scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s",
+                         lpb.entries, IS_DISABLED_RUN(!ctl));
+       }
 
-                /* there's no direct way to distinguish 750 vs. 770 */
-               if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
-                       cpu->name = "ARC750";
-       } else {
-               cpu->isa = isa;
+       n += scnprintf(buf + n, len - n,
+                       "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n",
+                       IS_AVAIL1(bpu_full, "full"),
+                       IS_AVAIL1(!bpu_full, "partial"),
+                       bpu_cache, bpu_pred, bpu_ret_stk,
+                       lpb_nm);
+
+       READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+       if (iccm.ver) {
+               unsigned long base;
+               info->iccm.sz = 256 << iccm.sz00;       /* 512B to 16M */
+               if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+                       info->iccm.sz <<= iccm.sz01;
+               base = read_aux_reg(ARC_REG_AUX_ICCM);
+               info->iccm.base = base & 0xF0000000;
        }
-}
 
-static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
-       struct bcr_identity *core = &cpu->core;
-       char mpy_opt[16];
-       int n = 0;
+       READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+       if (dccm.ver) {
+               unsigned long base;
+               info->dccm.sz = 256 << dccm.sz0;
+               if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+                       info->dccm.sz <<= dccm.sz1;
+               base = read_aux_reg(ARC_REG_AUX_DCCM);
+               info->dccm.base = base & 0xF0000000;
+       }
 
-       FIX_PTR(cpu);
+       /* Error Protection: ECC/Parity */
+       READ_BCR(ARC_REG_ERP_BUILD, erp);
+       if (erp.ver) {
+               struct ctl_erp ctl;
+               READ_BCR(ARC_REG_ERP_CTRL, ctl);
+               /* inverted bits: 0 means enabled */
+               n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+                               IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+                               IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+                               IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+       }
 
-       n += scnprintf(buf + n, len - n,
-                      "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
-                      core->family, core->cpu_id, core->chip_id);
+       /* ARCv2 ISA specific sanity checks */
+       present = fpu.sp | fpu.dp | mpy.dsp;    /* DSP and/or FPU */
+       CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
 
-       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
-                      cpu_id, cpu->name, cpu->release,
-                      is_isa_arcompact() ? "ARCompact" : "ARCv2",
-                      IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
-                      IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+       dsp_config_check();
+#endif
+       return n;
+}
 
-       n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
-                      IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
-                      IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
-                      IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
-                      IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
+{
+       struct bcr_identity ident;
+       struct bcr_timer timer;
+       struct bcr_generic bcr;
+       struct mcip_bcr mp;
+       struct bcr_actionpoint ap;
+       unsigned long vec_base;
+       int ap_num, ap_full, smart, rtt, n;
 
-       if (cpu->extn_mpy.ver) {
-               if (is_isa_arcompact()) {
-                       scnprintf(mpy_opt, 16, "mpy");
-               } else {
+       memset(info, 0, sizeof(struct cpuinfo_arc));
 
-                       int opt = 2;    /* stock MPY/MPYH */
+       READ_BCR(AUX_IDENTITY, ident);
+       info->arcver = ident.family;
 
-                       if (cpu->extn_mpy.dsp)  /* OPT 7-9 */
-                               opt = cpu->extn_mpy.dsp + 6;
+       n = scnprintf(buf, len,
+                      "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+                      ident.family, ident.cpu_id, ident.chip_id);
 
-                       scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
-               }
+       if (is_isa_arcompact()) {
+               n += arcompact_mumbojumbo(c, info, buf + n, len - n);
+       } else if (is_isa_arcv2()){
+               n += arcv2_mumbojumbo(c, info, buf + n, len - n);
        }
 
-       n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
-                      IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
-                      IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
-                      IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
-                      IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
-                      IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+       n += arc_mmu_mumbojumbo(c, buf + n, len - n);
+       n += arc_cache_mumbojumbo(c, buf + n, len - n);
 
-       if (cpu->bpu.ver) {
-               n += scnprintf(buf + n, len - n,
-                             "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
-                             IS_AVAIL1(cpu->bpu.full, "full"),
-                             IS_AVAIL1(!cpu->bpu.full, "partial"),
-                             cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
-
-               if (is_isa_arcv2()) {
-                       struct bcr_lpb lpb;
-
-                       READ_BCR(ARC_REG_LPB_BUILD, lpb);
-                       if (lpb.ver) {
-                               unsigned int ctl;
-                               ctl = read_aux_reg(ARC_REG_LPB_CTRL);
-
-                               n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
-                                              lpb.entries,
-                                              IS_DISABLED_RUN(!ctl));
-                       }
-               }
-               n += scnprintf(buf + n, len - n, "\n");
-       }
+       READ_BCR(ARC_REG_TIMERS_BCR, timer);
+       info->t0 = timer.t0;
+       info->t1 = timer.t1;
 
-       return buf;
-}
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
+       vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
-static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+       n += scnprintf(buf + n, len - n,
+                      "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n",
+                      IS_AVAIL1(timer.t0, "Timer0 "),
+                      IS_AVAIL1(timer.t1, "Timer1 "),
+                      IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+                      IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+                      vec_base);
 
-       FIX_PTR(cpu);
+       READ_BCR(ARC_REG_AP_BCR, ap);
+       if (ap.ver) {
+               ap_num = 2 << ap.num;
+               ap_full = !ap.min;
+       }
 
-       n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+       READ_BCR(ARC_REG_SMART_BCR, bcr);
+       smart = bcr.ver ? 1 : 0;
 
-       if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
-               n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
-                              IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
-                              IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+       READ_BCR(ARC_REG_RTT_BCR, bcr);
+       rtt = bcr.ver ? 1 : 0;
 
-       if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+       if (ap.ver | smart | rtt) {
                n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
-                              IS_AVAIL1(cpu->extn.smart, "smaRT "),
-                              IS_AVAIL1(cpu->extn.rtt, "RTT "));
-               if (cpu->extn.ap_num) {
+                              IS_AVAIL1(smart, "smaRT "),
+                              IS_AVAIL1(rtt, "RTT "));
+               if (ap.ver) {
                        n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
-                                      cpu->extn.ap_num,
-                                      cpu->extn.ap_full ? "full":"min");
+                                      ap_num,
+                                      ap_full ? "full":"min");
                }
                n += scnprintf(buf + n, len - n, "\n");
        }
 
-       if (cpu->dccm.sz || cpu->iccm.sz)
-               n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
-                              cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
-                              cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
-
-       if (is_isa_arcv2()) {
-
-               /* Error Protection: ECC/Parity */
-               struct bcr_erp erp;
-               READ_BCR(ARC_REG_ERP_BUILD, erp);
-
-               if (erp.ver) {
-                       struct  ctl_erp ctl;
-                       READ_BCR(ARC_REG_ERP_CTRL, ctl);
-
-                       /* inverted bits: 0 means enabled */
-                       n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
-                               IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
-                               IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
-                               IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
-               }
-       }
+       if (info->dccm.sz || info->iccm.sz)
+               n += scnprintf(buf + n, len - n,
+                              "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n",
+                              info->dccm.base, TO_KB(info->dccm.sz),
+                              info->iccm.base, TO_KB(info->iccm.sz));
 
        return buf;
 }
@@ -401,15 +378,15 @@ void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena)
                panic("Disable %s, hardware NOT present\n", opt_name);
 }
 
-static void arc_chk_core_config(void)
+/*
+ * ISA agnostic sanity checks
+ */
+static void arc_chk_core_config(struct cpuinfo_arc *info)
 {
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-       int present = 0;
-
-       if (!cpu->extn.timer0)
+       if (!info->t0)
                panic("Timer0 is not present!\n");
 
-       if (!cpu->extn.timer1)
+       if (!info->t1)
                panic("Timer1 is not present!\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -417,35 +394,17 @@ static void arc_chk_core_config(void)
         * DCCM can be arbit placed in hardware.
         * Make sure it's placement/sz matches what Linux is built with
         */
-       if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+       if ((unsigned int)__arc_dccm_base != info->dccm.base)
                panic("Linux built with incorrect DCCM Base address\n");
 
-       if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+       if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz)
                panic("Linux built with incorrect DCCM Size\n");
 #endif
 
 #ifdef CONFIG_ARC_HAS_ICCM
-       if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+       if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz)
                panic("Linux built with incorrect ICCM Size\n");
 #endif
-
-       /*
-        * FP hardware/software config sanity
-        * -If hardware present, kernel needs to save/restore FPU state
-        * -If not, it will crash trying to save/restore the non-existant regs
-        */
-
-       if (is_isa_arcompact()) {
-               /* only DPDP checked since SP has no arch visible regs */
-               present = cpu->extn.fpu_dp;
-               CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
-       } else {
-               /* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
-               present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
-               CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
-
-               dsp_config_check();
-       }
 }
 
 /*
@@ -456,21 +415,19 @@ static void arc_chk_core_config(void)
 
 void setup_processor(void)
 {
+       struct cpuinfo_arc info;
+       int c = smp_processor_id();
        char str[512];
-       int cpu_id = smp_processor_id();
 
-       read_arc_build_cfg_regs();
-       arc_init_IRQ();
+       pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str)));
+       pr_info("%s", arc_platform_smp_cpuinfo());
 
-       pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+       arc_chk_core_config(&info);
 
+       arc_init_IRQ();
        arc_mmu_init();
        arc_cache_init();
 
-       pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-       pr_info("%s", arc_platform_smp_cpuinfo());
-
-       arc_chk_core_config();
 }
 
 static inline bool uboot_arg_invalid(unsigned long addr)
@@ -617,6 +574,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        char *str;
        int cpu_id = ptr_to_cpu(v);
        struct device *cpu_dev = get_cpu_device(cpu_id);
+       struct cpuinfo_arc info;
        struct clk *cpu_clk;
        unsigned long freq = 0;
 
@@ -629,7 +587,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        if (!str)
                goto done;
 
-       seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+       seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE));
 
        cpu_clk = clk_get(cpu_dev, NULL);
        if (IS_ERR(cpu_clk)) {
@@ -646,9 +604,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   loops_per_jiffy / (500000 / HZ),
                   (loops_per_jiffy / (5000 / HZ)) % 100);
 
-       seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
-       seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
-       seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
        seq_printf(m, arc_platform_smp_cpuinfo());
 
        free_page((unsigned long)str);
index 3c1590c..0b3bb52 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
+#include <asm/entry.h>
 
 struct rt_sigframe {
        struct siginfo info;
index 409cfa4..8d9b188 100644 (file)
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 
-#include <asm/processor.h>
-#include <asm/setup.h>
 #include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -351,7 +352,7 @@ static inline int __do_IPI(unsigned long msg)
  * arch-common ISR to handle for inter-processor interrupts
  * Has hooks for platform specific IPI
  */
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
 {
        unsigned long pending;
        unsigned long __maybe_unused copy;
index 5372dc0..ea99c06 100644 (file)
@@ -29,6 +29,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 
 /*-------------------------------------------------------------------------
index 6b83e3f..9b9570b 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
+#include <asm/entry.h>
 #include <asm/setup.h>
 #include <asm/unaligned.h>
 #include <asm/kprobes.h>
@@ -109,9 +110,7 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
-       unsigned int param = regs->ecr_param;
-
-       switch (param) {
+       switch (regs->ecr.param) {
        case 1:
                trap_is_brkpt(address, regs);
                break;
index 7654c2e..d5b3ed2 100644 (file)
@@ -115,8 +115,8 @@ static void show_ecr_verbose(struct pt_regs *regs)
        /* For Data fault, this is data address not instruction addr */
        address = current->thread.fault_address;
 
-       vec = regs->ecr_vec;
-       cause_code = regs->ecr_cause;
+       vec = regs->ecr.vec;
+       cause_code = regs->ecr.cause;
 
        /* For DTLB Miss or ProtV, display the memory involved too */
        if (vec == ECR_V_DTLB_MISS) {
@@ -154,7 +154,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
                pr_cont("Misaligned r/w from 0x%08lx\n", address);
 #endif
        } else if (vec == ECR_V_TRAP) {
-               if (regs->ecr_param == 5)
+               if (regs->ecr.param == 5)
                        pr_cont("gcc generated __builtin_trap\n");
        } else {
                pr_cont("Check Programmer's Manual\n");
@@ -184,9 +184,10 @@ void show_regs(struct pt_regs *regs)
        if (user_mode(regs))
                show_faulting_vma(regs->ret); /* faulting code, not data */
 
-       pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx",
-               regs->event, current->thread.fault_address, regs->ret,
-               regs->status32);
+       pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+               regs->ecr.full, current->thread.fault_address, regs->ret);
+
+       pr_info("STAT32: 0x%08lx", regs->status32);
 
 #define STS_BIT(r, bit)        r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
 
index d2e09fe..d0a5cec 100644 (file)
 #endif
 
 ENTRY_CFI(memset)
-       PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
        mov.f   0, r2
 ;;; if size is zero
        jz.d    [blink]
        mov     r3, r0          ; don't clobber ret val
 
+       PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
+
 ;;; if length < 8
        brls.d.nt       r2, 8, .Lsmallchunk
        mov.f   lp_count,r2
index 3c16ee9..f7e05c1 100644 (file)
@@ -28,6 +28,10 @@ int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
+static struct cpuinfo_arc_cache {
+       unsigned int sz_k, line_len, colors;
+} ic_info, dc_info, slc_info;
+
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
                               unsigned long sz, const int op, const int full_page);
 
@@ -35,78 +39,24 @@ void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
 
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc_cache *p;
-
-#define PR_CACHE(p, cfg, str)                                          \
-       if (!(p)->line_len)                                             \
-               n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
-       else                                                            \
-               n += scnprintf(buf + n, len - n,                        \
-                       str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",  \
-                       (p)->sz_k, (p)->assoc, (p)->line_len,           \
-                       (p)->vipt ? "VIPT" : "PIPT",                    \
-                       (p)->alias ? " aliasing" : "",                  \
-                       IS_USED_CFG(cfg));
-
-       PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-       PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-       p = &cpuinfo_arc700[c].slc;
-       if (p->line_len)
-               n += scnprintf(buf + n, len - n,
-                              "SLC\t\t: %uK, %uB Line%s\n",
-                              p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
-
-       n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
-                      perip_base,
-                      IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
-
-       return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-static void read_decode_cache_bcr_arcv2(int cpu)
+static int read_decode_cache_bcr_arcv2(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+       struct cpuinfo_arc_cache *p_slc = &slc_info;
+       struct bcr_identity ident;
        struct bcr_generic sbcr;
-
-       struct bcr_slc_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
-               unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
-       } slc_cfg;
-
-       struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
-               unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
-       } cbcr;
-
-       struct bcr_volatile {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int start:4, limit:4, pad:22, order:1, disable:1;
-#else
-               unsigned int disable:1, order:1, pad:22, limit:4, start:4;
-#endif
-       } vol;
-
+       struct bcr_clust_cfg cbcr;
+       struct bcr_volatile vol;
+       int n = 0;
 
        READ_BCR(ARC_REG_SLC_BCR, sbcr);
        if (sbcr.ver) {
+               struct bcr_slc_cfg  slc_cfg;
                READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
                p_slc->sz_k = 128 << slc_cfg.sz;
                l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+               n += scnprintf(buf + n, len - n,
+                              "SLC\t\t: %uK, %uB Line%s\n",
+                              p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable));
        }
 
        READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
@@ -129,70 +79,83 @@ static void read_decode_cache_bcr_arcv2(int cpu)
                ioc_enable = 0;
        }
 
+       READ_BCR(AUX_IDENTITY, ident);
+
        /* HS 2.0 didn't have AUX_VOL */
-       if (cpuinfo_arc700[cpu].core.family > 0x51) {
+       if (ident.family > 0x51) {
                READ_BCR(AUX_VOL, vol);
                perip_base = vol.start << 28;
                /* HS 3.0 has limit and strict-ordering fields */
-               if (cpuinfo_arc700[cpu].core.family > 0x52)
+               if (ident.family > 0x52)
                        perip_end = (vol.limit << 28) - 1;
        }
+
+       n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+                      perip_base,
+                      IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+       return n;
 }
 
-void read_decode_cache_bcr(void)
+int arc_cache_mumbojumbo(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_cache *p_ic, *p_dc;
-       unsigned int cpu = smp_processor_id();
-       struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-               unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-       } ibcr, dbcr;
+       struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info;
+       struct bcr_cache ibcr, dbcr;
+       int vipt, assoc;
+       int n = 0;
 
-       p_ic = &cpuinfo_arc700[cpu].icache;
        READ_BCR(ARC_REG_IC_BCR, ibcr);
-
        if (!ibcr.ver)
                goto dc_chk;
 
-       if (ibcr.ver <= 3) {
+       if (is_isa_arcompact() && (ibcr.ver <= 3)) {
                BUG_ON(ibcr.config != 3);
-               p_ic->assoc = 2;                /* Fixed to 2w set assoc */
-       } else if (ibcr.ver >= 4) {
-               p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+               assoc = 2;              /* Fixed to 2w set assoc */
+       } else if (is_isa_arcv2() && (ibcr.ver >= 4)) {
+               assoc = 1 << ibcr.config;       /* 1,2,4,8 */
        }
 
        p_ic->line_len = 8 << ibcr.line_len;
        p_ic->sz_k = 1 << (ibcr.sz - 1);
-       p_ic->vipt = 1;
-       p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+       p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE);
+
+       n += scnprintf(buf + n, len - n,
+                       "I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n",
+                       p_ic->sz_k, assoc, p_ic->line_len,
+                       p_ic->colors > 1 ? " aliasing" : "",
+                       IS_USED_CFG(CONFIG_ARC_HAS_ICACHE));
 
 dc_chk:
-       p_dc = &cpuinfo_arc700[cpu].dcache;
        READ_BCR(ARC_REG_DC_BCR, dbcr);
-
        if (!dbcr.ver)
                goto slc_chk;
 
-       if (dbcr.ver <= 3) {
+       if (is_isa_arcompact() && (dbcr.ver <= 3)) {
                BUG_ON(dbcr.config != 2);
-               p_dc->assoc = 4;                /* Fixed to 4w set assoc */
-               p_dc->vipt = 1;
-               p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-       } else if (dbcr.ver >= 4) {
-               p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
-               p_dc->vipt = 0;
-               p_dc->alias = 0;                /* PIPT so can't VIPT alias */
+               vipt = 1;
+               assoc = 4;              /* Fixed to 4w set assoc */
+               p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE);
+       } else if (is_isa_arcv2() && (dbcr.ver >= 4)) {
+               vipt = 0;
+               assoc = 1 << dbcr.config;       /* 1,2,4,8 */
+               p_dc->colors = 1;               /* PIPT so can't VIPT alias */
        }
 
        p_dc->line_len = 16 << dbcr.line_len;
        p_dc->sz_k = 1 << (dbcr.sz - 1);
 
+       n += scnprintf(buf + n, len - n,
+                       "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+                       p_dc->sz_k, assoc, p_dc->line_len,
+                       vipt ? "VIPT" : "PIPT",
+                       p_dc->colors > 1 ? " aliasing" : "",
+                       IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
+
 slc_chk:
        if (is_isa_arcv2())
-                read_decode_cache_bcr_arcv2(cpu);
+               n += read_decode_cache_bcr_arcv2(c, buf + n, len - n);
+
+       return n;
 }
 
 /*
@@ -581,7 +544,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
        /*
@@ -644,7 +607,7 @@ noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 #endif
 }
 
-noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
        /*
@@ -1082,7 +1045,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
  * 3. All Caches need to be disabled when setting up IOC to elide any in-flight
  *    Coherency transactions
  */
-noinline void __init arc_ioc_setup(void)
+static noinline void __init arc_ioc_setup(void)
 {
        unsigned int ioc_base, mem_sz;
 
@@ -1144,12 +1107,10 @@ noinline void __init arc_ioc_setup(void)
  *    one core suffices for all
  *  - IOC setup / dma callbacks only need to be done once
  */
-void __init arc_cache_init_master(void)
+static noinline void __init arc_cache_init_master(void)
 {
-       unsigned int __maybe_unused cpu = smp_processor_id();
-
        if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-               struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+               struct cpuinfo_arc_cache *ic = &ic_info;
 
                if (!ic->line_len)
                        panic("cache support enabled but non-existent cache\n");
@@ -1162,14 +1123,14 @@ void __init arc_cache_init_master(void)
                 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
                 * pair to provide vaddr/paddr respectively, just as in MMU v3
                 */
-               if (is_isa_arcv2() && ic->alias)
+               if (is_isa_arcv2() && ic->colors > 1)
                        _cache_line_loop_ic_fn = __cache_line_loop_v3;
                else
                        _cache_line_loop_ic_fn = __cache_line_loop;
        }
 
        if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-               struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+               struct cpuinfo_arc_cache *dc = &dc_info;
 
                if (!dc->line_len)
                        panic("cache support enabled but non-existent cache\n");
@@ -1181,14 +1142,13 @@ void __init arc_cache_init_master(void)
                /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
                if (is_isa_arcompact()) {
                        int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-                       int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
 
-                       if (dc->alias) {
+                       if (dc->colors > 1) {
                                if (!handled)
                                        panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-                               if (CACHE_COLORS_NUM != num_colors)
+                               if (CACHE_COLORS_NUM != dc->colors)
                                        panic("CACHE_COLORS_NUM not optimized for config\n");
-                       } else if (!dc->alias && handled) {
+                       } else if (handled && dc->colors == 1) {
                                panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
                        }
                }
@@ -1231,9 +1191,6 @@ void __init arc_cache_init_master(void)
 void __ref arc_cache_init(void)
 {
        unsigned int __maybe_unused cpu = smp_processor_id();
-       char str[256];
-
-       pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
        if (!cpu)
                arc_cache_init_master();
index 4e14c42..88fa3a4 100644 (file)
@@ -22,14 +22,3 @@ int fixup_exception(struct pt_regs *regs)
 
        return 0;
 }
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-unsigned long arc_clear_user_noinline(void __user *to,
-               unsigned long n)
-{
-       return __arc_clear_user(to, n);
-}
-EXPORT_SYMBOL(arc_clear_user_noinline);
-
-#endif
index f59e722..95119a5 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
 #include <linux/mm_types.h>
+#include <asm/entry.h>
 #include <asm/mmu.h>
 
 /*
@@ -99,10 +100,10 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
        if (faulthandler_disabled() || !mm)
                goto no_context;
 
-       if (regs->ecr_cause & ECR_C_PROTV_STORE)        /* ST/EX */
+       if (regs->ecr.cause & ECR_C_PROTV_STORE)        /* ST/EX */
                write = 1;
-       else if ((regs->ecr_vec == ECR_V_PROTV) &&
-                (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+       else if ((regs->ecr.vec == ECR_V_PROTV) &&
+                (regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
                exec = 1;
 
        flags = FAULT_FLAG_DEFAULT;
index 9f64d72..6a71b23 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/arcregs.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
index 6f40f37..e536b2d 100644 (file)
@@ -18,7 +18,9 @@
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
-static int __read_mostly pae_exists;
+static struct cpuinfo_arc_mmu {
+       unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways;
+} mmuinfo;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -131,7 +133,7 @@ static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 
 noinline void local_flush_tlb_all(void)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        unsigned long flags;
        unsigned int entry;
        int num_tlb = mmu->sets * mmu->ways;
@@ -389,7 +391,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 /*
  * Routine to create a TLB entry
  */
-void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
        unsigned long flags;
        unsigned int asid_or_sasid, rwx;
@@ -564,89 +566,64 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void read_decode_mmu_bcr(void)
+int arc_mmu_mumbojumbo(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-       unsigned int tmp;
-       struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
-                    u_itlb:4, u_dtlb:4;
-#else
-       unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
-                    ways:4, ver:8;
-#endif
-       } *mmu3;
-
-       struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
-                    n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
-       /*           DTLB      ITLB      JES        JE         JA      */
-       unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
-                    pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif
-       } *mmu4;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
+       unsigned int bcr, u_dtlb, u_itlb, sasid;
+       struct bcr_mmu_3 *mmu3;
+       struct bcr_mmu_4 *mmu4;
+       char super_pg[64] = "";
+       int n = 0;
 
-       tmp = read_aux_reg(ARC_REG_MMU_BCR);
-       mmu->ver = (tmp >> 24);
+       bcr = read_aux_reg(ARC_REG_MMU_BCR);
+       mmu->ver = (bcr >> 24);
 
        if (is_isa_arcompact() && mmu->ver == 3) {
-               mmu3 = (struct bcr_mmu_3 *)&tmp;
+               mmu3 = (struct bcr_mmu_3 *)&bcr;
                mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
                mmu->sets = 1 << mmu3->sets;
                mmu->ways = 1 << mmu3->ways;
-               mmu->u_dtlb = mmu3->u_dtlb;
-               mmu->u_itlb = mmu3->u_itlb;
-               mmu->sasid = mmu3->sasid;
+               u_dtlb = mmu3->u_dtlb;
+               u_itlb = mmu3->u_itlb;
+               sasid = mmu3->sasid;
        } else {
-               mmu4 = (struct bcr_mmu_4 *)&tmp;
+               mmu4 = (struct bcr_mmu_4 *)&bcr;
                mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
                mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
                mmu->sets = 64 << mmu4->n_entry;
                mmu->ways = mmu4->n_ways * 2;
-               mmu->u_dtlb = mmu4->u_dtlb * 4;
-               mmu->u_itlb = mmu4->u_itlb * 4;
-               mmu->sasid = mmu4->sasid;
-               pae_exists = mmu->pae = mmu4->pae;
+               u_dtlb = mmu4->u_dtlb * 4;
+               u_itlb = mmu4->u_itlb * 4;
+               sasid = mmu4->sasid;
+               mmu->pae = mmu4->pae;
        }
-}
 
-char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
-       char super_pg[64] = "";
-
-       if (p_mmu->s_pg_sz_m)
-               scnprintf(super_pg, 64, "%dM Super Page %s",
-                         p_mmu->s_pg_sz_m,
-                         IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+       if (mmu->s_pg_sz_m)
+               scnprintf(super_pg, 64, "/%dM%s",
+                         mmu->s_pg_sz_m,
+                         IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":"");
 
        n += scnprintf(buf + n, len - n,
-                     "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-                      p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
-                      p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-                      p_mmu->u_dtlb, p_mmu->u_itlb,
-                      IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
-
-       return buf;
+                     "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n",
+                      mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
+                      mmu->sets, mmu->ways,
+                      u_dtlb, u_itlb,
+                      IS_AVAIL1(sasid, ", SASID"),
+                      IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+
+       return n;
 }
 
 int pae40_exist_but_not_enab(void)
 {
-       return pae_exists && !is_pae40_enabled();
+       return mmuinfo.pae && !is_pae40_enabled();
 }
 
 void arc_mmu_init(void)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-       char str[256];
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        int compat = 0;
 
-       pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
-
        /*
         * Can't be done in processor.h due to header include dependencies
         */
@@ -723,7 +700,7 @@ volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
                          struct pt_regs *regs)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        unsigned long flags;
        int set, n_ways = mmu->ways;
 
index b821df7..1feb990 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 
 #include <asm/asm-offsets.h>
index db96dcb..8803a0f 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0x00000000
 CONFIG_DRAM_BASE=0x00000000
index 343d533..aab8f86 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0xc0000000
 CONFIG_DRAM_BASE=0xc0000000
index 61ba704..4aabce4 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0xd0000000
 CONFIG_DRAM_BASE=0xd0000000
index a6d6f7a..1ab94da 100644 (file)
@@ -1,2 +1,3 @@
+# Help: Enable Large Physical Address Extension mode
 CONFIG_ARM_LPAE=y
 CONFIG_VMSPLIT_2G=y
index f3cd04f..72529f5 100644 (file)
@@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
        return false;
 }
 
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
+
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
diff --git a/arch/arm/include/asm/ide.h b/arch/arm/include/asm/ide.h
deleted file mode 100644 (file)
index a81e0b0..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  arch/arm/include/asm/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the ARM architecture specific IDE code.
- */
-
-#ifndef __ASMARM_IDE_H
-#define __ASMARM_IDE_H
-
-#ifdef __KERNEL__
-
-#define __ide_mm_insw(port,addr,len)   readsw(port,addr,len)
-#define __ide_mm_insl(port,addr,len)   readsl(port,addr,len)
-#define __ide_mm_outsw(port,addr,len)  writesw(port,addr,len)
-#define __ide_mm_outsl(port,addr,len)  writesl(port,addr,len)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMARM_IDE_H */
index 6865d54..c47c36f 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Virtualization guest
 #
 # Base options for platforms
 #
index 58e5eb2..5882b24 100644 (file)
 #define HCR_DCT                (UL(1) << 57)
 #define HCR_ATA_SHIFT  56
 #define HCR_ATA                (UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS     (UL(1) << 55)
+#define HCR_TTLBIS     (UL(1) << 54)
+#define HCR_ENSCXT     (UL(1) << 53)
+#define HCR_TOCU       (UL(1) << 52)
 #define HCR_AMVOFFEN   (UL(1) << 51)
+#define HCR_TICAB      (UL(1) << 50)
 #define HCR_TID4       (UL(1) << 49)
 #define HCR_FIEN       (UL(1) << 47)
 #define HCR_FWB                (UL(1) << 46)
+#define HCR_NV2                (UL(1) << 45)
+#define HCR_AT         (UL(1) << 44)
+#define HCR_NV1                (UL(1) << 43)
+#define HCR_NV         (UL(1) << 42)
 #define HCR_API                (UL(1) << 41)
 #define HCR_APK                (UL(1) << 40)
 #define HCR_TEA                (UL(1) << 37)
@@ -89,7 +98,6 @@
                         HCR_BSU_IS | HCR_FB | HCR_TACR | \
                         HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
                         HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
                                 BIT(18) |              \
                                 GENMASK(16, 15))
 
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGWTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51) |    \
+                                BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+                                GENMASK(26, 25) | BIT(21) | BIT(18) |  \
+                                GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGWTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGITR_EL2_RES0      GENMASK(63, 57)
+#define __HFGITR_EL2_MASK      GENMASK(54, 0)
+#define __HFGITR_EL2_nMASK     GENMASK(56, 55)
+
+#define __HDFGRTR_EL2_RES0     (BIT(49) | BIT(42) | GENMASK(39, 38) |  \
+                                GENMASK(21, 20) | BIT(8))
+#define __HDFGRTR_EL2_MASK     ~__HDFGRTR_EL2_nMASK
+#define __HDFGRTR_EL2_nMASK    GENMASK(62, 59)
+
+#define __HDFGWTR_EL2_RES0     (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
+                                BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
+                                BIT(22) | BIT(9) | BIT(6))
+#define __HDFGWTR_EL2_MASK     ~__HDFGWTR_EL2_nMASK
+#define __HDFGWTR_EL2_nMASK    GENMASK(62, 60)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0                (GENMASK(63, 16) | GENMASK(13, 12))
+#define __HCRX_EL2_MASK                (0)
+#define __HCRX_EL2_nMASK       (GENMASK(15, 14) | GENMASK(4, 0))
+
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK     (~UL(0xf))
 /*
index 24e28bb..24b5e6b 100644 (file)
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+       __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
        __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
        __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
        __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
                                         phys_addr_t ipa,
                                         int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                                       phys_addr_t start, unsigned long pages);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
index d3dd05b..af06ccb 100644 (file)
@@ -49,6 +49,7 @@
 #define KVM_REQ_RELOAD_GICv4   KVM_ARCH_REQ(4)
 #define KVM_REQ_RELOAD_PMU     KVM_ARCH_REQ(5)
 #define KVM_REQ_SUSPEND                KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
@@ -380,6 +381,7 @@ enum vcpu_sysreg {
        CPTR_EL2,       /* Architectural Feature Trap Register (EL2) */
        HSTR_EL2,       /* Hypervisor System Trap Register */
        HACR_EL2,       /* Hypervisor Auxiliary Control Register */
+       HCRX_EL2,       /* Extended Hypervisor Configuration Register */
        TTBR0_EL2,      /* Translation Table Base Register 0 (EL2) */
        TTBR1_EL2,      /* Translation Table Base Register 1 (EL2) */
        TCR_EL2,        /* Translation Control Register (EL2) */
@@ -400,6 +402,11 @@ enum vcpu_sysreg {
        TPIDR_EL2,      /* EL2 Software Thread ID Register */
        CNTHCTL_EL2,    /* Counter-timer Hypervisor Control register */
        SP_EL2,         /* EL2 Stack Pointer */
+       HFGRTR_EL2,
+       HFGWTR_EL2,
+       HFGITR_EL2,
+       HDFGRTR_EL2,
+       HDFGWTR_EL2,
        CNTHP_CTL_EL2,
        CNTHP_CVAL_EL2,
        CNTHV_CTL_EL2,
@@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
        /* Cache some mmu pages needed inside spinlock regions */
        struct kvm_mmu_memory_cache mmu_page_cache;
 
-       /* Target CPU and feature flags */
-       int target;
+       /* feature flags */
        DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
        /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
@@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
 #define VCPU_SVE_FINALIZED     __vcpu_single_flag(cflags, BIT(1))
 /* PTRAUTH exposed to guest */
 #define GUEST_HAS_PTRAUTH      __vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED       __vcpu_single_flag(cflags, BIT(3))
 
 /* Exception pending */
 #define PENDING_EXCEPTION      __vcpu_single_flag(iflags, BIT(0))
@@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
        u64 exits;
 };
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
 #endif /* __KVM_NVHE_HYPERVISOR__ */
 
-void force_vm_exit(const cpumask_t *mask);
-
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
@@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
 
 int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
 
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
@@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
        return cpus_have_const_cap(ARM64_SPECTRE_V3A);
 }
 
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
@@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
        return false;
 }
 
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
-
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
index 0e1e1ab..96a80e8 100644 (file)
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
                           void __iomem **haddr);
 int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
                             void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
 void __init free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
index 8fb67f0..fa23cc9 100644 (file)
@@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
                test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
 }
 
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
 struct sys_reg_params;
 struct sys_reg_desc;
 
index 929d355..d3e354b 100644 (file)
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  *        kvm_pgtable_prot format.
  */
 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu:       Stage-2 KVM MMU struct
+ * @addr:      The base Intermediate physical address from which to invalidate
+ * @size:      Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
index 16464bf..3829657 100644 (file)
 #define SYS_DC_CIGSW                   sys_insn(1, 0, 7, 14, 4)
 #define SYS_DC_CIGDSW                  sys_insn(1, 0, 7, 14, 6)
 
+#define SYS_IC_IALLUIS                 sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU                   sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU                    sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC                    sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC                   sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC                  sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC                    sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC                   sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC                  sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU                    sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP                    sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP                   sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP                  sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP                   sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP                  sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP                 sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC                   sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC                  sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC                 sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA                     sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA                     sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA                    sys_insn(1, 3, 7, 4, 4)
+
 /*
  * Automatically generated definitions for system registers, the
  * manual encodings below are in the process of being converted to
 #define SYS_DBGDTRTX_EL0               sys_reg(2, 3, 0, 5, 0)
 #define SYS_DBGVCR32_EL2               sys_reg(2, 4, 0, 7, 0)
 
+#define SYS_BRBINF_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1              sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1              sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1              sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1                  sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1                  sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1                 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1                        sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1               sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m)                        sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m)                 sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS              sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR                 sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR                  sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR                  sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0               sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1               sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m)               sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR                        sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET                        sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m)              sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m)             sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m)                        sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR                 sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH                 sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID                   sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R              sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R              sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m)            sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0                    sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10                   sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11                   sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12                   sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13                   sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1                    sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2                    sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3                    sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4                    sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5                    sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6                    sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7                    sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8                    sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9                    sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m)               sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR                 sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR                   sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR                 sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR                   sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m)               sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR                     sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m)               sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR               sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR                  sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m)                        sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m)                        sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m)              sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR               sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR                   sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR                  sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR                        sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR                  sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR                  sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR                        sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR              sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR                        sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0              sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1              sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m)              sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR                   sys_reg(2, 1, 1, 0, 4)
+
 #define SYS_MIDR_EL1                   sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1                  sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1                 sys_reg(3, 0, 0, 0, 6)
 #define SYS_ERXCTLR_EL1                        sys_reg(3, 0, 5, 4, 1)
 #define SYS_ERXSTATUS_EL1              sys_reg(3, 0, 5, 4, 2)
 #define SYS_ERXADDR_EL1                        sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1                        sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1              sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1              sys_reg(3, 0, 5, 4, 6)
 #define SYS_ERXMISC0_EL1               sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1               sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1               sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1               sys_reg(3, 0, 5, 5, 3)
 #define SYS_TFSR_EL1                   sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1                 sys_reg(3, 0, 5, 6, 1)
 
 #define SYS_ICC_IGRPEN0_EL1            sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1            sys_reg(3, 0, 12, 12, 7)
 
+#define SYS_ACCDATA_EL1                        sys_reg(3, 0, 13, 0, 5)
+
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 #define SYS_VTCR_EL2                   sys_reg(3, 4, 2, 1, 2)
 
 #define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2                        sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2                        sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2                        sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2                    sys_reg(3, 4, 4, 0, 1)
 
 #define SYS_SP_EL2                     sys_reg(3, 6,  4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R    sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W    sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R    sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W    sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP   sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP   sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E2R    sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W    sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R   sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W   sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R   sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W   sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS              sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS                 sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS               sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS                        sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS                        sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS               sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS                        sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS               sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS               sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS              sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS              sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS                 sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS               sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS                        sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS                        sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS               sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS                        sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS               sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS               sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS              sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1                  sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1                 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1                 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1                        sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1                        sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1                   sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1                 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1                  sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1                  sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1                 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS           sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS              sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS            sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS             sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS             sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS            sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS             sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS            sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS            sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS           sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS           sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS              sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS            sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS             sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS             sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS            sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS             sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS            sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS            sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS           sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS               sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS              sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS              sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS             sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS             sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS                        sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS              sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS               sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS               sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS              sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS              sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS             sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS             sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS            sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS                        sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS                 sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS                        sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS                        sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS           sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS                        sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS               sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS                        sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS                 sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS                        sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS                        sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS           sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS              sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1                        sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1               sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS             sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS             sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1               sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1              sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS            sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS                        sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS               sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2                  sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2                 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2                  sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2                   sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1                  sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2                  sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1             sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS           sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS          sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS          sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS         sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS             sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS              sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS             sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS             sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS                sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS             sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS            sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS             sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS              sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS             sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS             sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS                sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS           sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS             sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS            sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS          sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS          sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS            sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS           sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS         sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS             sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS            sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS               sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS              sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS               sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS                        sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS               sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS               sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS          sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_BRB_IALL                    sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ                     sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX                    sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX                    sys_insn(1, 3, 7, 3, 5)
+#define OP_CPP_RCTX                    sys_insn(1, 3, 7, 3, 7)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2        (BIT(60))
 #define SCTLR_ELx_DSSBS        (BIT(44))
index 55b50e1..b149cf9 100644 (file)
@@ -335,14 +335,77 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  */
 #define MAX_TLBI_OPS   PTRS_PER_PTE
 
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op:        TLBI instruction that operates on a range (has 'r' prefix)
+ * @start:     The start address of the range
+ * @pages:     Range as the number of pages from 'start'
+ * @stride:    Flush granularity
+ * @asid:      The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ *              (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,                 \
+                               asid, tlb_level, tlbi_user)             \
+do {                                                                   \
+       int num = 0;                                                    \
+       int scale = 0;                                                  \
+       unsigned long addr;                                             \
+                                                                       \
+       while (pages > 0) {                                             \
+               if (!system_supports_tlb_range() ||                     \
+                   pages % 2 == 1) {                                   \
+                       addr = __TLBI_VADDR(start, asid);               \
+                       __tlbi_level(op, addr, tlb_level);              \
+                       if (tlbi_user)                                  \
+                               __tlbi_user_level(op, addr, tlb_level); \
+                       start += stride;                                \
+                       pages -= stride >> PAGE_SHIFT;                  \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               num = __TLBI_RANGE_NUM(pages, scale);                   \
+               if (num >= 0) {                                         \
+                       addr = __TLBI_VADDR_RANGE(start, asid, scale,   \
+                                                 num, tlb_level);      \
+                       __tlbi(r##op, addr);                            \
+                       if (tlbi_user)                                  \
+                               __tlbi_user(r##op, addr);               \
+                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+                       pages -= __TLBI_RANGE_PAGES(num, scale);        \
+               }                                                       \
+               scale++;                                                \
+       }                                                               \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+       __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     unsigned long stride, bool last_level,
                                     int tlb_level)
 {
-       int num = 0;
-       int scale = 0;
-       unsigned long asid, addr, pages;
+       unsigned long asid, pages;
 
        start = round_down(start, stride);
        end = round_up(end, stride);
@@ -364,56 +427,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
        dsb(ishst);
        asid = ASID(vma->vm_mm);
 
-       /*
-        * When the CPU does not support TLB range operations, flush the TLB
-        * entries one by one at the granularity of 'stride'. If the TLB
-        * range ops are supported, then:
-        *
-        * 1. If 'pages' is odd, flush the first page through non-range
-        *    operations;
-        *
-        * 2. For remaining pages: the minimum range granularity is decided
-        *    by 'scale', so multiple range TLBI operations may be required.
-        *    Start from scale = 0, flush the corresponding number of pages
-        *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-        *    until no pages left.
-        *
-        * Note that certain ranges can be represented by either num = 31 and
-        * scale or num = 0 and scale + 1. The loop below favours the latter
-        * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-        */
-       while (pages > 0) {
-               if (!system_supports_tlb_range() ||
-                   pages % 2 == 1) {
-                       addr = __TLBI_VADDR(start, asid);
-                       if (last_level) {
-                               __tlbi_level(vale1is, addr, tlb_level);
-                               __tlbi_user_level(vale1is, addr, tlb_level);
-                       } else {
-                               __tlbi_level(vae1is, addr, tlb_level);
-                               __tlbi_user_level(vae1is, addr, tlb_level);
-                       }
-                       start += stride;
-                       pages -= stride >> PAGE_SHIFT;
-                       continue;
-               }
-
-               num = __TLBI_RANGE_NUM(pages, scale);
-               if (num >= 0) {
-                       addr = __TLBI_VADDR_RANGE(start, asid, scale,
-                                                 num, tlb_level);
-                       if (last_level) {
-                               __tlbi(rvale1is, addr);
-                               __tlbi_user(rvale1is, addr);
-                       } else {
-                               __tlbi(rvae1is, addr);
-                               __tlbi_user(rvae1is, addr);
-                       }
-                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-                       pages -= __TLBI_RANGE_PAGES(num, scale);
-               }
-               scale++;
-       }
+       if (last_level)
+               __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+       else
+               __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
        dsb(ish);
        mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
 }
index a5f533f..b018ae1 100644 (file)
@@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
        },
+       {
+               .desc = "Fine Grained Traps",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_HAS_FGT,
+               .matches = has_cpuid_feature,
+               ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+       },
 #ifdef CONFIG_ARM64_SME
        {
                .desc = "Scalable Matrix Extension",
index aee12c7..3addc09 100644 (file)
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
                if (!len)
                        return;
 
-               len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
-               if (len == -E2BIG)
-                       len = ARRAY_SIZE(buf) - 1;
+               len = min(len, ARRAY_SIZE(buf) - 1);
+               memcpy(buf, cmdline, len);
+               buf[len] = '\0';
 
                if (strcmp(buf, "--") == 0)
                        return;
index f531da6..83c1e09 100644 (file)
@@ -25,7 +25,6 @@ menuconfig KVM
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select HAVE_KVM_CPU_RELAX_INTERCEPT
-       select HAVE_KVM_ARCH_TLB_FLUSH_ALL
        select KVM_MMIO
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select KVM_XFER_TO_GUEST_WORK
@@ -43,6 +42,7 @@ menuconfig KVM
        select SCHED_INFO
        select GUEST_PERF_EVENTS if PERF_EVENTS
        select INTERVAL_TREE
+       select XARRAY_MULTI
        help
          Support hosting virtualized guest machines.
 
index d1cb298..4866b3f 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/kvm_pkvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/sections.h>
@@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 #endif
 
        /* Force users to call KVM_ARM_VCPU_INIT */
-       vcpu->arch.target = -1;
+       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
        bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
        vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                vcpu_ptrauth_disable(vcpu);
        kvm_arch_vcpu_load_debug_state_flags(vcpu);
 
-       if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+       if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
                vcpu_set_on_unsupported_cpu(vcpu);
 }
 
@@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.target >= 0;
+       return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
 }
 
 /*
@@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
                        kvm_pmu_handle_pmcr(vcpu,
                                            __vcpu_sys_reg(vcpu, PMCR_EL0));
 
+               if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
+                       kvm_vcpu_pmu_restore_guest(vcpu);
+
                if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
                        return kvm_vcpu_suspend(vcpu);
 
@@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
        if (likely(!vcpu_mode_is_32bit(vcpu)))
                return false;
 
+       if (vcpu_has_nv(vcpu))
+               return true;
+
        return !kvm_supports_32bit_el0();
 }
 
@@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                         * invalid. The VMM can try and fix it by issuing  a
                         * KVM_ARM_VCPU_INIT if it really wants to.
                         */
-                       vcpu->arch.target = -1;
+                       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                        ret = ARM_EXCEPTION_IL;
                }
 
@@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
 {
        unsigned long features = init->features[0];
 
-       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
-                       vcpu->arch.target != init->target;
+       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 }
 
 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
            !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
                goto out_unlock;
 
-       vcpu->arch.target = init->target;
        bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 
        /* Now we know what it is, we can reset it. */
        ret = kvm_reset_vcpu(vcpu);
        if (ret) {
-               vcpu->arch.target = -1;
                bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
                goto out_unlock;
        }
 
        bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
        set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
-
+       vcpu_set_flag(vcpu, VCPU_INITIALIZED);
 out_unlock:
        mutex_unlock(&kvm->arch.config_lock);
        return ret;
@@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
        int ret;
 
-       if (init->target != kvm_target_cpu())
+       if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
+           init->target != kvm_target_cpu())
                return -EINVAL;
 
        ret = kvm_vcpu_init_check_features(vcpu, init);
        if (ret)
                return ret;
 
-       if (vcpu->arch.target == -1)
+       if (!kvm_vcpu_initialized(vcpu))
                return __kvm_vcpu_set_target(vcpu, init);
 
        if (kvm_vcpu_init_changed(vcpu, init))
@@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
                                        struct kvm_arm_device_addr *dev_addr)
 {
@@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
        case KVM_ARM_PREFERRED_TARGET: {
-               struct kvm_vcpu_init init;
-
-               kvm_vcpu_preferred_target(&init);
+               struct kvm_vcpu_init init = {
+                       .target = KVM_ARM_TARGET_GENERIC_V8,
+               };
 
                if (copy_to_user(argp, &init, sizeof(init)))
                        return -EFAULT;
@@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void)
        for_each_possible_cpu(cpu) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
                char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-               unsigned long hyp_addr;
 
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
-               if (err) {
-                       kvm_err("Cannot allocate hyp stack guard page\n");
-                       goto out_err;
-               }
-
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
-                                           __pa(stack_page), PAGE_HYP);
+               err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
                if (err) {
                        kvm_err("Cannot map hyp stack\n");
                        goto out_err;
@@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void)
                 * has been mapped in the flexible private VA space.
                 */
                params->stack_pa = __pa(stack_page);
-
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        for_each_possible_cpu(cpu) {
index b966620..9ced1bf 100644 (file)
 
 #include "trace.h"
 
+enum trap_behaviour {
+       BEHAVE_HANDLE_LOCALLY   = 0,
+       BEHAVE_FORWARD_READ     = BIT(0),
+       BEHAVE_FORWARD_WRITE    = BIT(1),
+       BEHAVE_FORWARD_ANY      = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+};
+
+struct trap_bits {
+       const enum vcpu_sysreg          index;
+       const enum trap_behaviour       behaviour;
+       const u64                       value;
+       const u64                       mask;
+};
+
+/* Coarse Grained Trap definitions */
+enum cgt_group_id {
+       /* Indicates no coarse trap control */
+       __RESERVED__,
+
+       /*
+        * The first batch of IDs denote coarse trapping that are used
+        * on their own instead of being part of a combination of
+        * trap controls.
+        */
+       CGT_HCR_TID1,
+       CGT_HCR_TID2,
+       CGT_HCR_TID3,
+       CGT_HCR_IMO,
+       CGT_HCR_FMO,
+       CGT_HCR_TIDCP,
+       CGT_HCR_TACR,
+       CGT_HCR_TSW,
+       CGT_HCR_TPC,
+       CGT_HCR_TPU,
+       CGT_HCR_TTLB,
+       CGT_HCR_TVM,
+       CGT_HCR_TDZ,
+       CGT_HCR_TRVM,
+       CGT_HCR_TLOR,
+       CGT_HCR_TERR,
+       CGT_HCR_APK,
+       CGT_HCR_NV,
+       CGT_HCR_NV_nNV2,
+       CGT_HCR_NV1_nNV2,
+       CGT_HCR_AT,
+       CGT_HCR_nFIEN,
+       CGT_HCR_TID4,
+       CGT_HCR_TICAB,
+       CGT_HCR_TOCU,
+       CGT_HCR_ENSCXT,
+       CGT_HCR_TTLBIS,
+       CGT_HCR_TTLBOS,
+
+       CGT_MDCR_TPMCR,
+       CGT_MDCR_TPM,
+       CGT_MDCR_TDE,
+       CGT_MDCR_TDA,
+       CGT_MDCR_TDOSA,
+       CGT_MDCR_TDRA,
+       CGT_MDCR_E2PB,
+       CGT_MDCR_TPMS,
+       CGT_MDCR_TTRF,
+       CGT_MDCR_E2TB,
+       CGT_MDCR_TDCC,
+
+       /*
+        * Anything after this point is a combination of coarse trap
+        * controls, which must all be evaluated to decide what to do.
+        */
+       __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_TID2_TID4,
+       CGT_HCR_TTLB_TTLBIS,
+       CGT_HCR_TTLB_TTLBOS,
+       CGT_HCR_TVM_TRVM,
+       CGT_HCR_TPU_TICAB,
+       CGT_HCR_TPU_TOCU,
+       CGT_HCR_NV1_nNV2_ENSCXT,
+       CGT_MDCR_TPM_TPMCR,
+       CGT_MDCR_TDE_TDA,
+       CGT_MDCR_TDE_TDOSA,
+       CGT_MDCR_TDE_TDRA,
+       CGT_MDCR_TDCC_TDE_TDA,
+
+       /*
+        * Anything after this point requires a callback evaluating a
+        * complex trap condition. Ugly stuff.
+        */
+       __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PTEN,
+
+       /* Must be last */
+       __NR_CGT_GROUP_IDS__
+};
+
+static const struct trap_bits coarse_trap_bits[] = {
+       [CGT_HCR_TID1] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID1,
+               .mask           = HCR_TID1,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TID2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID2,
+               .mask           = HCR_TID2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID3] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID3,
+               .mask           = HCR_TID3,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_IMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_IMO,
+               .mask           = HCR_IMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_FMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_FMO,
+               .mask           = HCR_FMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TIDCP] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TIDCP,
+               .mask           = HCR_TIDCP,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TACR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TACR,
+               .mask           = HCR_TACR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TSW] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TSW,
+               .mask           = HCR_TSW,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
+               .index          = HCR_EL2,
+               .value          = HCR_TPC,
+               .mask           = HCR_TPC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TPU,
+               .mask           = HCR_TPU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLB,
+               .mask           = HCR_TTLB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TVM,
+               .mask           = HCR_TVM,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TDZ] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TDZ,
+               .mask           = HCR_TDZ,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TRVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TRVM,
+               .mask           = HCR_TRVM,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TLOR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TLOR,
+               .mask           = HCR_TLOR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TERR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TERR,
+               .mask           = HCR_TERR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_APK] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_APK,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV1_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV | HCR_NV1,
+               .mask           = HCR_NV | HCR_NV1 | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_AT] = {
+               .index          = HCR_EL2,
+               .value          = HCR_AT,
+               .mask           = HCR_AT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_nFIEN] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_FIEN,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID4] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID4,
+               .mask           = HCR_TID4,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TICAB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TICAB,
+               .mask           = HCR_TICAB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TOCU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TOCU,
+               .mask           = HCR_TOCU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_ENSCXT] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_ENSCXT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBIS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBIS,
+               .mask           = HCR_TTLBIS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBOS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBOS,
+               .mask           = HCR_TTLBOS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMCR] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMCR,
+               .mask           = MDCR_EL2_TPMCR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPM] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPM,
+               .mask           = MDCR_EL2_TPM,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDE] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDE,
+               .mask           = MDCR_EL2_TDE,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDA,
+               .mask           = MDCR_EL2_TDA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDOSA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDOSA,
+               .mask           = MDCR_EL2_TDOSA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDRA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDRA,
+               .mask           = MDCR_EL2_TDRA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2PB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2PB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMS] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMS,
+               .mask           = MDCR_EL2_TPMS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TTRF] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TTRF,
+               .mask           = MDCR_EL2_TTRF,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2TB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2TB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDCC] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDCC,
+               .mask           = MDCR_EL2_TDCC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+};
+
+#define MCB(id, ...)                                           \
+       [id - __MULTIPLE_CONTROL_BITS__]        =               \
+               (const enum cgt_group_id[]){                    \
+               __VA_ARGS__, __RESERVED__                       \
+               }
+
+static const enum cgt_group_id *coarse_control_combo[] = {
+       MCB(CGT_HCR_IMO_FMO,            CGT_HCR_IMO, CGT_HCR_FMO),
+       MCB(CGT_HCR_TID2_TID4,          CGT_HCR_TID2, CGT_HCR_TID4),
+       MCB(CGT_HCR_TTLB_TTLBIS,        CGT_HCR_TTLB, CGT_HCR_TTLBIS),
+       MCB(CGT_HCR_TTLB_TTLBOS,        CGT_HCR_TTLB, CGT_HCR_TTLBOS),
+       MCB(CGT_HCR_TVM_TRVM,           CGT_HCR_TVM, CGT_HCR_TRVM),
+       MCB(CGT_HCR_TPU_TICAB,          CGT_HCR_TPU, CGT_HCR_TICAB),
+       MCB(CGT_HCR_TPU_TOCU,           CGT_HCR_TPU, CGT_HCR_TOCU),
+       MCB(CGT_HCR_NV1_nNV2_ENSCXT,    CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
+       MCB(CGT_MDCR_TPM_TPMCR,         CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+       MCB(CGT_MDCR_TDE_TDA,           CGT_MDCR_TDE, CGT_MDCR_TDA),
+       MCB(CGT_MDCR_TDE_TDOSA,         CGT_MDCR_TDE, CGT_MDCR_TDOSA),
+       MCB(CGT_MDCR_TDE_TDRA,          CGT_MDCR_TDE, CGT_MDCR_TDRA),
+       MCB(CGT_MDCR_TDCC_TDE_TDA,      CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+};
+
+typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
+
+/*
+ * Warning, maximum confusion ahead.
+ *
+ * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN
+ * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN
+ *
+ * Note the single letter difference? Yet, the bits have the same
+ * function despite a different layout and a different name.
+ *
+ * We don't try to reconcile this mess. We just use the E2H=0 bits
+ * to generate something that is in the E2H=1 format, and live with
+ * it. You're welcome.
+ */
+static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu)
+{
+       u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+       if (!vcpu_el2_e2h_is_set(vcpu))
+               val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+       return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10);
+}
+
+static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+#define CCC(id, fn)                            \
+       [id - __COMPLEX_CONDITIONS__] = fn
+
+static const complex_condition_check ccc[] = {
+       CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
+       CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+};
+
+/*
+ * Bit assignment for the trap controls. We use a 64bit word with the
+ * following layout for each trapped sysreg:
+ *
+ * [9:0]       enum cgt_group_id (10 bits)
+ * [13:10]     enum fgt_group_id (4 bits)
+ * [19:14]     bit number in the FGT register (6 bits)
+ * [20]                trap polarity (1 bit)
+ * [25:21]     FG filter (5 bits)
+ * [62:26]     Unused (37 bits)
+ * [63]                RES0 - Must be zero, as lost on insertion in the xarray
+ */
+#define TC_CGT_BITS    10
+#define TC_FGT_BITS    4
+#define TC_FGF_BITS    5
+
+union trap_config {
+       u64     val;
+       struct {
+               unsigned long   cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
+               unsigned long   fgt:TC_FGT_BITS; /* Fine Grained Trap id */
+               unsigned long   bit:6;           /* Bit number */
+               unsigned long   pol:1;           /* Polarity */
+               unsigned long   fgf:TC_FGF_BITS; /* Fine Grained Filter */
+               unsigned long   unused:37;       /* Unused, should be zero */
+               unsigned long   mbz:1;           /* Must Be Zero */
+       };
+};
+
+struct encoding_to_trap_config {
+       const u32                       encoding;
+       const u32                       end;
+       const union trap_config         tc;
+       const unsigned int              line;
+};
+
+#define SR_RANGE_TRAP(sr_start, sr_end, trap_id)                       \
+       {                                                               \
+               .encoding       = sr_start,                             \
+               .end            = sr_end,                               \
+               .tc             = {                                     \
+                       .cgt            = trap_id,                      \
+               },                                                      \
+               .line = __LINE__,                                       \
+       }
+
+#define SR_TRAP(sr, trap_id)           SR_RANGE_TRAP(sr, sr, trap_id)
+
+/*
+ * Map encoding to trap bits for exception reported with EC=0x18.
+ * These must only be evaluated when running a nested hypervisor, but
+ * that the current context is not a hypervisor context. When the
+ * trapped access matches one of the trap controls, the exception is
+ * re-injected in the nested hypervisor.
+ */
+static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
+       SR_TRAP(SYS_REVIDR_EL1,         CGT_HCR_TID1),
+       SR_TRAP(SYS_AIDR_EL1,           CGT_HCR_TID1),
+       SR_TRAP(SYS_SMIDR_EL1,          CGT_HCR_TID1),
+       SR_TRAP(SYS_CTR_EL0,            CGT_HCR_TID2),
+       SR_TRAP(SYS_CCSIDR_EL1,         CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CCSIDR2_EL1,        CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CLIDR_EL1,          CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CSSELR_EL1,         CGT_HCR_TID2_TID4),
+       SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
+                     sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
+       SR_TRAP(SYS_ICC_SGI0R_EL1,      CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_ASGI1R_EL1,     CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_SGI1R_EL1,      CGT_HCR_IMO_FMO),
+       SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
+                     sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
+                     sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0),
+                     sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0),
+                     sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0),
+                     sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0),
+                     sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0),
+                     sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0),
+                     sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0),
+                     sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0),
+                     sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0),
+                     sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0),
+                     sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0),
+                     sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0),
+                     sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0),
+                     sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0),
+                     sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_TRAP(SYS_ACTLR_EL1,          CGT_HCR_TACR),
+       SR_TRAP(SYS_DC_ISW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CSW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CISW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGDSW,          CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAP,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVADP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGDVAC,         CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVADP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVADP,         CGT_HCR_TPC),
+       SR_TRAP(SYS_IC_IVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLU,           CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLUIS,         CGT_HCR_TPU_TICAB),
+       SR_TRAP(SYS_DC_CVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(OP_TLBI_RVAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1,           CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1NXS,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1IS,         CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1ISNXS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OS,         CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VMALLE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OSNXS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(SYS_SCTLR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TCR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_ESR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_FAR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_MAIR_EL1,           CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AMAIR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_CONTEXTIDR_EL1,     CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_DC_ZVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GZVA,            CGT_HCR_TDZ),
+       SR_TRAP(SYS_LORSA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LOREA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORN_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORC_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORID_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_ERRIDR_EL1,         CGT_HCR_TERR),
+       SR_TRAP(SYS_ERRSELR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXADDR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXCTLR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXFR_EL1,          CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC0_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC1_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC2_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC3_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXSTATUS_EL1,      CGT_HCR_TERR),
+       SR_TRAP(SYS_APIAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYHI_EL1,      CGT_HCR_APK),
+       /* All _EL2 registers */
+       SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
+                     sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+       /* Skip the SP_EL1 encoding... */
+       SR_TRAP(SYS_SPSR_EL2,           CGT_HCR_NV),
+       SR_TRAP(SYS_ELR_EL2,            CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
+                     sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
+                     sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+       /* All _EL02, _EL12 registers */
+       SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
+                     sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
+                     sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2R,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2W,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1W,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0W,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1NXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1NXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2NXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2NXS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1NXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1IS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1IS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2IS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2IS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1IS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1ISNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2ISNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2ISNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OSNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OSNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OSNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_CPP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_DVP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_CFP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(SYS_SP_EL1,             CGT_HCR_NV_nNV2),
+       SR_TRAP(SYS_VBAR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_ELR_EL1,            CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SPSR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SCXTNUM_EL1,        CGT_HCR_NV1_nNV2_ENSCXT),
+       SR_TRAP(SYS_SCXTNUM_EL0,        CGT_HCR_ENSCXT),
+       SR_TRAP(OP_AT_S1E1R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1RP,           CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1WP,           CGT_HCR_AT),
+       SR_TRAP(SYS_ERXPFGF_EL1,        CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCTL_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCDN_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_PMCR_EL0,           CGT_MDCR_TPM_TPMCR),
+       SR_TRAP(SYS_PMCNTENSET_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCNTENCLR_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSSET_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSCLR_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID0_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID1_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVTYPER_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSWINC_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSELR_EL0,         CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVCNTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCNTR_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMUSERENR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENSET_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENCLR_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMMIR_EL1,          CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(0),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(1),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(2),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(3),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(4),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(5),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(6),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(7),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(8),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(9),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(10),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(11),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(12),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(13),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(14),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(15),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(16),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(17),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(18),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(19),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(20),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(21),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(22),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(23),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(24),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(25),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(26),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(27),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(28),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(29),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(30),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(0),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(1),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(2),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(3),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(4),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(5),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(6),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(7),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(8),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(9),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCFILTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_MDCCSR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDCCINT_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRRX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRTX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_DBGDTR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       /*
+        * Also covers DBGDTRRX_EL0, which has the same encoding as
+        * SYS_DBGDTRTX_EL0...
+        */
+       SR_TRAP(SYS_DBGDTRTX_EL0,       CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDSCR_EL1,          CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSECCR_EL1,         CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMSET_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMCLR_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGAUTHSTATUS_EL1,  CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSLAR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSLSR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSDLR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_DBGPRCR_EL1,        CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_MDRAR_EL1,          CGT_MDCR_TDE_TDRA),
+       SR_TRAP(SYS_PMBLIMITR_EL1,      CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBPTR_EL1,         CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBSR_EL1,          CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMSCR_EL1,          CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSEVFR_EL1,        CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSFCR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSICR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIDR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIRR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSLATFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSNEVFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_TRFCR_EL1,          CGT_MDCR_TTRF),
+       SR_TRAP(SYS_TRBBASER_EL1,       CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBLIMITR_EL1,      CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBMAR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBPTR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBSR_EL1,          CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBTRG_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_CNTP_TVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CTL_EL0,       CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTPCT_EL0,         CGT_CNTHCTL_EL1PCTEN),
+       SR_TRAP(SYS_CNTPCTSS_EL0,       CGT_CNTHCTL_EL1PCTEN),
+};
+
+static DEFINE_XARRAY(sr_forward_xa);
+
+enum fgt_group_id {
+       __NO_FGT_GROUP__,
+       HFGxTR_GROUP,
+       HDFGRTR_GROUP,
+       HDFGWTR_GROUP,
+       HFGITR_GROUP,
+
+       /* Must be last */
+       __NR_FGT_GROUP_IDS__
+};
+
+enum fg_filter_id {
+       __NO_FGF__,
+       HCRX_FGTnXS,
+
+       /* Must be last */
+       __NR_FG_FILTER_IDS__
+};
+
+#define SR_FGF(sr, g, b, p, f)                                 \
+       {                                                       \
+               .encoding       = sr,                           \
+               .end            = sr,                           \
+               .tc             = {                             \
+                       .fgt = g ## _GROUP,                     \
+                       .bit = g ## _EL2_ ## b ## _SHIFT,       \
+                       .pol = p,                               \
+                       .fgf = f,                               \
+               },                                              \
+               .line = __LINE__,                               \
+       }
+
+#define SR_FGT(sr, g, b, p)    SR_FGF(sr, g, b, p, __NO_FGF__)
+
+static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+       /* HFGRTR_EL2, HFGWTR_EL2 */
+       SR_FGT(SYS_TPIDR2_EL0,          HFGxTR, nTPIDR2_EL0, 0),
+       SR_FGT(SYS_SMPRI_EL1,           HFGxTR, nSMPRI_EL1, 0),
+       SR_FGT(SYS_ACCDATA_EL1,         HFGxTR, nACCDATA_EL1, 0),
+       SR_FGT(SYS_ERXADDR_EL1,         HFGxTR, ERXADDR_EL1, 1),
+       SR_FGT(SYS_ERXPFGCDN_EL1,       HFGxTR, ERXPFGCDN_EL1, 1),
+       SR_FGT(SYS_ERXPFGCTL_EL1,       HFGxTR, ERXPFGCTL_EL1, 1),
+       SR_FGT(SYS_ERXPFGF_EL1,         HFGxTR, ERXPFGF_EL1, 1),
+       SR_FGT(SYS_ERXMISC0_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC1_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC2_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC3_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXSTATUS_EL1,       HFGxTR, ERXSTATUS_EL1, 1),
+       SR_FGT(SYS_ERXCTLR_EL1,         HFGxTR, ERXCTLR_EL1, 1),
+       SR_FGT(SYS_ERXFR_EL1,           HFGxTR, ERXFR_EL1, 1),
+       SR_FGT(SYS_ERRSELR_EL1,         HFGxTR, ERRSELR_EL1, 1),
+       SR_FGT(SYS_ERRIDR_EL1,          HFGxTR, ERRIDR_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN0_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN1_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_VBAR_EL1,            HFGxTR, VBAR_EL1, 1),
+       SR_FGT(SYS_TTBR1_EL1,           HFGxTR, TTBR1_EL1, 1),
+       SR_FGT(SYS_TTBR0_EL1,           HFGxTR, TTBR0_EL1, 1),
+       SR_FGT(SYS_TPIDR_EL0,           HFGxTR, TPIDR_EL0, 1),
+       SR_FGT(SYS_TPIDRRO_EL0,         HFGxTR, TPIDRRO_EL0, 1),
+       SR_FGT(SYS_TPIDR_EL1,           HFGxTR, TPIDR_EL1, 1),
+       SR_FGT(SYS_TCR_EL1,             HFGxTR, TCR_EL1, 1),
+       SR_FGT(SYS_SCXTNUM_EL0,         HFGxTR, SCXTNUM_EL0, 1),
+       SR_FGT(SYS_SCXTNUM_EL1,         HFGxTR, SCXTNUM_EL1, 1),
+       SR_FGT(SYS_SCTLR_EL1,           HFGxTR, SCTLR_EL1, 1),
+       SR_FGT(SYS_REVIDR_EL1,          HFGxTR, REVIDR_EL1, 1),
+       SR_FGT(SYS_PAR_EL1,             HFGxTR, PAR_EL1, 1),
+       SR_FGT(SYS_MPIDR_EL1,           HFGxTR, MPIDR_EL1, 1),
+       SR_FGT(SYS_MIDR_EL1,            HFGxTR, MIDR_EL1, 1),
+       SR_FGT(SYS_MAIR_EL1,            HFGxTR, MAIR_EL1, 1),
+       SR_FGT(SYS_LORSA_EL1,           HFGxTR, LORSA_EL1, 1),
+       SR_FGT(SYS_LORN_EL1,            HFGxTR, LORN_EL1, 1),
+       SR_FGT(SYS_LORID_EL1,           HFGxTR, LORID_EL1, 1),
+       SR_FGT(SYS_LOREA_EL1,           HFGxTR, LOREA_EL1, 1),
+       SR_FGT(SYS_LORC_EL1,            HFGxTR, LORC_EL1, 1),
+       SR_FGT(SYS_ISR_EL1,             HFGxTR, ISR_EL1, 1),
+       SR_FGT(SYS_FAR_EL1,             HFGxTR, FAR_EL1, 1),
+       SR_FGT(SYS_ESR_EL1,             HFGxTR, ESR_EL1, 1),
+       SR_FGT(SYS_DCZID_EL0,           HFGxTR, DCZID_EL0, 1),
+       SR_FGT(SYS_CTR_EL0,             HFGxTR, CTR_EL0, 1),
+       SR_FGT(SYS_CSSELR_EL1,          HFGxTR, CSSELR_EL1, 1),
+       SR_FGT(SYS_CPACR_EL1,           HFGxTR, CPACR_EL1, 1),
+       SR_FGT(SYS_CONTEXTIDR_EL1,      HFGxTR, CONTEXTIDR_EL1, 1),
+       SR_FGT(SYS_CLIDR_EL1,           HFGxTR, CLIDR_EL1, 1),
+       SR_FGT(SYS_CCSIDR_EL1,          HFGxTR, CCSIDR_EL1, 1),
+       SR_FGT(SYS_APIBKEYLO_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIBKEYHI_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIAKEYLO_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APIAKEYHI_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APGAKEYLO_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APGAKEYHI_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APDBKEYLO_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDBKEYHI_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDAKEYLO_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_APDAKEYHI_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_AMAIR_EL1,           HFGxTR, AMAIR_EL1, 1),
+       SR_FGT(SYS_AIDR_EL1,            HFGxTR, AIDR_EL1, 1),
+       SR_FGT(SYS_AFSR1_EL1,           HFGxTR, AFSR1_EL1, 1),
+       SR_FGT(SYS_AFSR0_EL1,           HFGxTR, AFSR0_EL1, 1),
+       /* HFGITR_EL2 */
+       SR_FGT(OP_BRB_IALL,             HFGITR, nBRBIALL, 0),
+       SR_FGT(OP_BRB_INJ,              HFGITR, nBRBINJ, 0),
+       SR_FGT(SYS_DC_CVAC,             HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGVAC,            HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGDVAC,           HFGITR, DCCVAC, 1),
+       SR_FGT(OP_CPP_RCTX,             HFGITR, CPPRCTX, 1),
+       SR_FGT(OP_DVP_RCTX,             HFGITR, DVPRCTX, 1),
+       SR_FGT(OP_CFP_RCTX,             HFGITR, CFPRCTX, 1),
+       SR_FGT(OP_TLBI_VAALE1,          HFGITR, TLBIVAALE1, 1),
+       SR_FGT(OP_TLBI_VALE1,           HFGITR, TLBIVALE1, 1),
+       SR_FGT(OP_TLBI_VAAE1,           HFGITR, TLBIVAAE1, 1),
+       SR_FGT(OP_TLBI_ASIDE1,          HFGITR, TLBIASIDE1, 1),
+       SR_FGT(OP_TLBI_VAE1,            HFGITR, TLBIVAE1, 1),
+       SR_FGT(OP_TLBI_VMALLE1,         HFGITR, TLBIVMALLE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1,         HFGITR, TLBIRVAALE1, 1),
+       SR_FGT(OP_TLBI_RVALE1,          HFGITR, TLBIRVALE1, 1),
+       SR_FGT(OP_TLBI_RVAAE1,          HFGITR, TLBIRVAAE1, 1),
+       SR_FGT(OP_TLBI_RVAE1,           HFGITR, TLBIRVAE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1IS,       HFGITR, TLBIRVAALE1IS, 1),
+       SR_FGT(OP_TLBI_RVALE1IS,        HFGITR, TLBIRVALE1IS, 1),
+       SR_FGT(OP_TLBI_RVAAE1IS,        HFGITR, TLBIRVAAE1IS, 1),
+       SR_FGT(OP_TLBI_RVAE1IS,         HFGITR, TLBIRVAE1IS, 1),
+       SR_FGT(OP_TLBI_VAALE1IS,        HFGITR, TLBIVAALE1IS, 1),
+       SR_FGT(OP_TLBI_VALE1IS,         HFGITR, TLBIVALE1IS, 1),
+       SR_FGT(OP_TLBI_VAAE1IS,         HFGITR, TLBIVAAE1IS, 1),
+       SR_FGT(OP_TLBI_ASIDE1IS,        HFGITR, TLBIASIDE1IS, 1),
+       SR_FGT(OP_TLBI_VAE1IS,          HFGITR, TLBIVAE1IS, 1),
+       SR_FGT(OP_TLBI_VMALLE1IS,       HFGITR, TLBIVMALLE1IS, 1),
+       SR_FGT(OP_TLBI_RVAALE1OS,       HFGITR, TLBIRVAALE1OS, 1),
+       SR_FGT(OP_TLBI_RVALE1OS,        HFGITR, TLBIRVALE1OS, 1),
+       SR_FGT(OP_TLBI_RVAAE1OS,        HFGITR, TLBIRVAAE1OS, 1),
+       SR_FGT(OP_TLBI_RVAE1OS,         HFGITR, TLBIRVAE1OS, 1),
+       SR_FGT(OP_TLBI_VAALE1OS,        HFGITR, TLBIVAALE1OS, 1),
+       SR_FGT(OP_TLBI_VALE1OS,         HFGITR, TLBIVALE1OS, 1),
+       SR_FGT(OP_TLBI_VAAE1OS,         HFGITR, TLBIVAAE1OS, 1),
+       SR_FGT(OP_TLBI_ASIDE1OS,        HFGITR, TLBIASIDE1OS, 1),
+       SR_FGT(OP_TLBI_VAE1OS,          HFGITR, TLBIVAE1OS, 1),
+       SR_FGT(OP_TLBI_VMALLE1OS,       HFGITR, TLBIVMALLE1OS, 1),
+       /* nXS variants must be checked against HCRX_EL2.FGTnXS */
+       SR_FGF(OP_TLBI_VAALE1NXS,       HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1NXS,        HFGITR, TLBIVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1NXS,        HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1NXS,       HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1NXS,         HFGITR, TLBIVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1NXS,      HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1NXS,      HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1NXS,       HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1NXS,       HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1NXS,        HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1ISNXS,    HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1ISNXS,     HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1ISNXS,     HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1ISNXS,      HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1ISNXS,     HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1ISNXS,      HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1ISNXS,      HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1ISNXS,     HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1ISNXS,       HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1ISNXS,    HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1OSNXS,    HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1OSNXS,     HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1OSNXS,     HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1OSNXS,      HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1OSNXS,     HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1OSNXS,      HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1OSNXS,      HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1OSNXS,     HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1OSNXS,       HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1OSNXS,    HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS),
+       SR_FGT(OP_AT_S1E1WP,            HFGITR, ATS1E1WP, 1),
+       SR_FGT(OP_AT_S1E1RP,            HFGITR, ATS1E1RP, 1),
+       SR_FGT(OP_AT_S1E0W,             HFGITR, ATS1E0W, 1),
+       SR_FGT(OP_AT_S1E0R,             HFGITR, ATS1E0R, 1),
+       SR_FGT(OP_AT_S1E1W,             HFGITR, ATS1E1W, 1),
+       SR_FGT(OP_AT_S1E1R,             HFGITR, ATS1E1R, 1),
+       SR_FGT(SYS_DC_ZVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GZVA,             HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_CIVAC,            HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGVAC,           HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGDVAC,          HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CVADP,            HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGVADP,           HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGDVADP,          HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CVAP,             HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGVAP,            HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGDVAP,           HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CVAU,             HFGITR, DCCVAU, 1),
+       SR_FGT(SYS_DC_CISW,             HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGSW,            HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGDSW,           HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CSW,              HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGSW,             HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGDSW,            HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_ISW,              HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGSW,             HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGDSW,            HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IVAC,             HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGVAC,            HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGDVAC,           HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_IC_IVAU,             HFGITR, ICIVAU, 1),
+       SR_FGT(SYS_IC_IALLU,            HFGITR, ICIALLU, 1),
+       SR_FGT(SYS_IC_IALLUIS,          HFGITR, ICIALLUIS, 1),
+       /* HDFGRTR_EL2 */
+       SR_FGT(SYS_PMBIDR_EL1,          HDFGRTR, PMBIDR_EL1, 1),
+       SR_FGT(SYS_PMSNEVFR_EL1,        HDFGRTR, nPMSNEVFR_EL1, 0),
+       SR_FGT(SYS_BRBINF_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINFINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRCINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGTINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTS_EL1,           HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBCR_EL1,           HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBFCR_EL1,          HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBIDR0_EL1,         HDFGRTR, nBRBIDR, 0),
+       SR_FGT(SYS_PMCEID0_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMCEID1_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMUSERENR_EL0,       HDFGRTR, PMUSERENR_EL0, 1),
+       SR_FGT(SYS_TRBTRG_EL1,          HDFGRTR, TRBTRG_EL1, 1),
+       SR_FGT(SYS_TRBSR_EL1,           HDFGRTR, TRBSR_EL1, 1),
+       SR_FGT(SYS_TRBPTR_EL1,          HDFGRTR, TRBPTR_EL1, 1),
+       SR_FGT(SYS_TRBMAR_EL1,          HDFGRTR, TRBMAR_EL1, 1),
+       SR_FGT(SYS_TRBLIMITR_EL1,       HDFGRTR, TRBLIMITR_EL1, 1),
+       SR_FGT(SYS_TRBIDR_EL1,          HDFGRTR, TRBIDR_EL1, 1),
+       SR_FGT(SYS_TRBBASER_EL1,        HDFGRTR, TRBBASER_EL1, 1),
+       SR_FGT(SYS_TRCVICTLR,           HDFGRTR, TRCVICTLR, 1),
+       SR_FGT(SYS_TRCSTATR,            HDFGRTR, TRCSTATR, 1),
+       SR_FGT(SYS_TRCSSCSR(0),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(1),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(2),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(3),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(4),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(5),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(6),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(7),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSEQSTR,           HDFGRTR, TRCSEQSTR, 1),
+       SR_FGT(SYS_TRCPRGCTLR,          HDFGRTR, TRCPRGCTLR, 1),
+       SR_FGT(SYS_TRCOSLSR,            HDFGRTR, TRCOSLSR, 1),
+       SR_FGT(SYS_TRCIMSPEC(0),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(1),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(2),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(3),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(4),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(5),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(6),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(7),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCDEVARCH,          HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCDEVID,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR0,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR1,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR2,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR3,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR4,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR5,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR6,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR7,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR8,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR9,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR10,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR11,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR12,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR13,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCCNTVR(0),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(1),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(2),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(3),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCLAIMCLR,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCCLAIMSET,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCAUXCTLR,          HDFGRTR, TRCAUXCTLR, 1),
+       SR_FGT(SYS_TRCAUTHSTATUS,       HDFGRTR, TRCAUTHSTATUS, 1),
+       SR_FGT(SYS_TRCACATR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(8),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(9),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(10),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(11),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(12),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(13),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(14),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(15),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(0),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(1),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(2),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(3),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(4),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(5),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(6),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(7),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(8),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(9),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(10),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(11),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(12),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(13),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(14),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(15),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCBBCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCCCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR0,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR1,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(0),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(1),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(2),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(3),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCONFIGR,          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL0R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL1R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(0),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(1),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(2),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(3),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCQCTLR,            HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(8),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(9),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(10),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(11),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(12),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(13),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(14),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(15),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(16),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(17),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(18),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(19),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(20),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(21),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(22),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(23),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(24),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(25),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(26),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(27),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(28),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(29),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(30),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(31),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSR,              HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQRSTEVR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSTALLCTLR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSYNCPR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTRACEIDR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTSCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIIECTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIPCSSCTLR,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVISSCTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR0,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR1,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_PMSLATFR_EL1,        HDFGRTR, PMSLATFR_EL1, 1),
+       SR_FGT(SYS_PMSIRR_EL1,          HDFGRTR, PMSIRR_EL1, 1),
+       SR_FGT(SYS_PMSIDR_EL1,          HDFGRTR, PMSIDR_EL1, 1),
+       SR_FGT(SYS_PMSICR_EL1,          HDFGRTR, PMSICR_EL1, 1),
+       SR_FGT(SYS_PMSFCR_EL1,          HDFGRTR, PMSFCR_EL1, 1),
+       SR_FGT(SYS_PMSEVFR_EL1,         HDFGRTR, PMSEVFR_EL1, 1),
+       SR_FGT(SYS_PMSCR_EL1,           HDFGRTR, PMSCR_EL1, 1),
+       SR_FGT(SYS_PMBSR_EL1,           HDFGRTR, PMBSR_EL1, 1),
+       SR_FGT(SYS_PMBPTR_EL1,          HDFGRTR, PMBPTR_EL1, 1),
+       SR_FGT(SYS_PMBLIMITR_EL1,       HDFGRTR, PMBLIMITR_EL1, 1),
+       SR_FGT(SYS_PMMIR_EL1,           HDFGRTR, PMMIR_EL1, 1),
+       SR_FGT(SYS_PMSELR_EL0,          HDFGRTR, PMSELR_EL0, 1),
+       SR_FGT(SYS_PMOVSCLR_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMOVSSET_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMINTENCLR_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMINTENSET_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMCNTENCLR_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCNTENSET_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCCNTR_EL0,         HDFGRTR, PMCCNTR_EL0, 1),
+       SR_FGT(SYS_PMCCFILTR_EL0,       HDFGRTR, PMCCFILTR_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(0),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(1),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(2),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(3),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(4),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(5),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(6),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(7),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(8),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(9),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(10),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(11),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(12),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(13),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(14),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(15),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(16),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(17),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(18),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(19),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(20),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(21),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(22),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(23),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(24),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(25),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(26),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(27),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(28),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(29),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(30),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(0),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(1),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(2),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(3),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(4),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(5),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(6),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(7),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(8),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(9),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(10),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(11),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(12),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(13),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(14),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(15),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(16),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(17),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(18),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(19),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(20),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(21),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(22),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(23),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(24),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(25),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(26),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(27),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(28),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(29),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(30),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_OSDLR_EL1,           HDFGRTR, OSDLR_EL1, 1),
+       SR_FGT(SYS_OSECCR_EL1,          HDFGRTR, OSECCR_EL1, 1),
+       SR_FGT(SYS_OSLSR_EL1,           HDFGRTR, OSLSR_EL1, 1),
+       SR_FGT(SYS_DBGPRCR_EL1,         HDFGRTR, DBGPRCR_EL1, 1),
+       SR_FGT(SYS_DBGAUTHSTATUS_EL1,   HDFGRTR, DBGAUTHSTATUS_EL1, 1),
+       SR_FGT(SYS_DBGCLAIMSET_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_DBGCLAIMCLR_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_MDSCR_EL1,           HDFGRTR, MDSCR_EL1, 1),
+       /*
+        * The trap bits capture *64* debug registers per bit, but the
+        * ARM ARM only describes the encoding for the first 16, and
+        * we don't really support more than that anyway.
+        */
+       SR_FGT(SYS_DBGWVRn_EL1(0),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(1),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(2),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(3),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(4),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(5),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(6),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(7),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(8),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(9),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(10),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(11),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(12),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(13),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(14),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(15),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(0),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(1),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(2),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(3),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(4),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(5),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(6),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(7),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(8),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(9),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(10),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(11),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(12),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(13),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(14),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(15),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(0),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(1),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(2),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(3),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(4),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(5),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(6),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(7),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(8),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(9),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(10),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(11),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(12),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(13),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(14),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(15),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(0),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(1),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(2),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(3),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(4),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(5),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(6),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(7),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(8),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(9),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(10),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(11),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(12),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(13),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(14),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(15),     HDFGRTR, DBGBCRn_EL1, 1),
+       /*
+        * HDFGWTR_EL2
+        *
+        * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely
+        * overlap in their bit assignment, there are a number of bits
+        * that are RES0 on one side, and an actual trap bit on the
+        * other.  The policy chosen here is to describe all the
+        * read-side mappings, and only the write-side mappings that
+        * differ from the read side, and the trap handler will pick
+        * the correct shadow register based on the access type.
+        */
+       SR_FGT(SYS_TRFCR_EL1,           HDFGWTR, TRFCR_EL1, 1),
+       SR_FGT(SYS_TRCOSLAR,            HDFGWTR, TRCOSLAR, 1),
+       SR_FGT(SYS_PMCR_EL0,            HDFGWTR, PMCR_EL0, 1),
+       SR_FGT(SYS_PMSWINC_EL0,         HDFGWTR, PMSWINC_EL0, 1),
+       SR_FGT(SYS_OSLAR_EL1,           HDFGWTR, OSLAR_EL1, 1),
+};
+
+static union trap_config get_trap_config(u32 sysreg)
+{
+       return (union trap_config) {
+               .val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
+       };
+}
+
+static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
+                                      const char *type, int err)
+{
+       kvm_err("%s line %d encoding range "
+               "(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
+               type, tc->line,
+               sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
+               sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
+               sys_reg_Op2(tc->encoding),
+               sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
+               sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
+               sys_reg_Op2(tc->end),
+               err);
+}
+
+int __init populate_nv_trap_config(void)
+{
+       int ret = 0;
+
+       BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
+       BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
+       BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
+       BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
+               const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
+               void *prev;
+
+               if (cgt->tc.val & BIT(63)) {
+                       kvm_err("CGT[%d] has MBZ bit set\n", i);
+                       ret = -EINVAL;
+               }
+
+               if (cgt->encoding != cgt->end) {
+                       prev = xa_store_range(&sr_forward_xa,
+                                             cgt->encoding, cgt->end,
+                                             xa_mk_value(cgt->tc.val),
+                                             GFP_KERNEL);
+               } else {
+                       prev = xa_store(&sr_forward_xa, cgt->encoding,
+                                       xa_mk_value(cgt->tc.val), GFP_KERNEL);
+                       if (prev && !xa_is_err(prev)) {
+                               ret = -EINVAL;
+                               print_nv_trap_error(cgt, "Duplicate CGT", ret);
+                       }
+               }
+
+               if (xa_is_err(prev)) {
+                       ret = xa_err(prev);
+                       print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+               }
+       }
+
+       kvm_info("nv: %ld coarse grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_cgt));
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               goto check_mcb;
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
+               const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
+               union trap_config tc;
+
+               if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Invalid FGT", ret);
+               }
+
+               tc = get_trap_config(fgt->encoding);
+
+               if (tc.fgt) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Duplicate FGT", ret);
+               }
+
+               tc.val |= fgt->tc.val;
+               xa_store(&sr_forward_xa, fgt->encoding,
+                        xa_mk_value(tc.val), GFP_KERNEL);
+       }
+
+       kvm_info("nv: %ld fine grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_fgt));
+
+check_mcb:
+       for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
+               const enum cgt_group_id *cgids;
+
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+
+               for (int i = 0; cgids[i] != __RESERVED__; i++) {
+                       if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+                               kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
+                               ret = -EINVAL;
+                       }
+               }
+       }
+
+       if (ret)
+               xa_destroy(&sr_forward_xa);
+
+       return ret;
+}
+
+static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
+                                        const struct trap_bits *tb)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+       u64 val;
+
+       val = __vcpu_sys_reg(vcpu, tb->index);
+       if ((val & tb->mask) == tb->value)
+               b |= tb->behaviour;
+
+       return b;
+}
+
+static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                   const enum cgt_group_id id,
+                                                   enum trap_behaviour b)
+{
+       switch (id) {
+               const enum cgt_group_id *cgids;
+
+       case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
+               if (likely(id != __RESERVED__))
+                       b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
+               break;
+       case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
+               /* Yes, this is recursive. Don't do anything stupid. */
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+               for (int i = 0; cgids[i] != __RESERVED__; i++)
+                       b |= __compute_trap_behaviour(vcpu, cgids[i], b);
+               break;
+       default:
+               if (ARRAY_SIZE(ccc))
+                       b |= ccc[id -  __COMPLEX_CONDITIONS__](vcpu);
+               break;
+       }
+
+       return b;
+}
+
+static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                 const union trap_config tc)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+
+       return __compute_trap_behaviour(vcpu, tc.cgt, b);
+}
+
+static bool check_fgt_bit(u64 val, const union trap_config tc)
+{
+       return ((val >> tc.bit) & 1) == tc.pol;
+}
+
+#define sanitised_sys_reg(vcpu, reg)                   \
+       ({                                              \
+               u64 __val;                              \
+               __val = __vcpu_sys_reg(vcpu, reg);      \
+               __val &= ~__ ## reg ## _RES0;           \
+               (__val);                                \
+       })
+
+bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+{
+       union trap_config tc;
+       enum trap_behaviour b;
+       bool is_read;
+       u32 sysreg;
+       u64 esr, val;
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return false;
+
+       esr = kvm_vcpu_get_esr(vcpu);
+       sysreg = esr_sys64_to_sysreg(esr);
+       is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+       tc = get_trap_config(sysreg);
+
+       /*
+        * A value of 0 for the whole entry means that we know nothing
+        * for this sysreg, and that it cannot be re-injected into the
+        * nested hypervisor. In this situation, let's cut it short.
+        *
+        * Note that ultimately, we could also make use of the xarray
+        * to store the index of the sysreg in the local descriptor
+        * array, avoiding another search... Hint, hint...
+        */
+       if (!tc.val)
+               return false;
+
+       switch ((enum fgt_group_id)tc.fgt) {
+       case __NO_FGT_GROUP__:
+               break;
+
+       case HFGxTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+               break;
+
+       case HDFGRTR_GROUP:
+       case HDFGWTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+               break;
+
+       case HFGITR_GROUP:
+               val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+               switch (tc.fgf) {
+                       u64 tmp;
+
+               case __NO_FGF__:
+                       break;
+
+               case HCRX_FGTnXS:
+                       tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+                       if (tmp & HCRX_EL2_FGTnXS)
+                               tc.fgt = __NO_FGT_GROUP__;
+               }
+               break;
+
+       case __NR_FGT_GROUP_IDS__:
+               /* Something is really wrong, bail out */
+               WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+               return false;
+       }
+
+       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+               goto inject;
+
+       b = compute_trap_behaviour(vcpu, tc);
+
+       if (((b & BEHAVE_FORWARD_READ) && is_read) ||
+           ((b & BEHAVE_FORWARD_WRITE) && !is_read))
+               goto inject;
+
+       return false;
+
+inject:
+       trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
+
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+       return true;
+}
+
 static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
 {
        u64 mode = spsr & PSR_MODE_MASK;
index 20280a5..95f6945 100644 (file)
@@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
        return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
-       u32 target = kvm_target_cpu();
-
-       memset(init, 0, sizeof(*init));
-
-       /*
-        * For now, we don't return any features.
-        * In future, we might use features to return target
-        * specific features available for the preferred
-        * target type.
-        */
-       init->target = (__u32)target;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index 6dcd660..617ae6d 100644 (file)
@@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
        if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
                return kvm_handle_ptrauth(vcpu);
 
-       kvm_emulate_nested_eret(vcpu);
+       /*
+        * If we got here, two possibilities:
+        *
+        * - the guest is in EL2, and we need to fully emulate ERET
+        *
+        * - the guest is in EL1, and we need to reinject the
+         *   exception into the L1 hypervisor.
+        *
+        * If KVM ever traps ERET for its own use, we'll have to
+        * revisit this.
+        */
+       if (is_hyp_ctxt(vcpu))
+               kvm_emulate_nested_eret(vcpu);
+       else
+               kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+       return 1;
+}
+
+static int handle_svc(struct kvm_vcpu *vcpu)
+{
+       /*
+        * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
+        * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
+        * we should only have to deal with a 64 bit exception.
+        */
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
        return 1;
 }
 
@@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
        [ESR_ELx_EC_SMC32]      = handle_smc,
        [ESR_ELx_EC_HVC64]      = handle_hvc,
        [ESR_ELx_EC_SMC64]      = handle_smc,
+       [ESR_ELx_EC_SVC64]      = handle_svc,
        [ESR_ELx_EC_SYS64]      = kvm_handle_sys_reg,
        [ESR_ELx_EC_SVE]        = handle_sve,
        [ESR_ELx_EC_ERET]       = kvm_handle_eret,
index 34f222a..9cfe6bd 100644 (file)
@@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
        }
 }
 
-static inline bool __hfgxtr_traps_required(void)
-{
-       if (cpus_have_final_cap(ARM64_SME))
-               return true;
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               return true;
+#define compute_clr_set(vcpu, reg, clr, set)                           \
+       do {                                                            \
+               u64 hfg;                                                \
+               hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0;  \
+               set |= hfg & __ ## reg ## _MASK;                        \
+               clr |= ~hfg & __ ## reg ## _nMASK;                      \
+       } while(0)
 
-       return false;
-}
 
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
        u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       u64 r_val, w_val;
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
 
        if (cpus_have_final_cap(ARM64_SME)) {
                tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
        if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
                w_set |= HFGxTR_EL2_TCR_EL1_MASK;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+               compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+               compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+       }
+
+       /* The default is not to trap anything but ACCDATA_EL1 */
+       r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+
+       r_set = r_clr = 0;
+       compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
+       r_val = __HFGITR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       write_sysreg_s(r_val, SYS_HFGITR_EL2);
+
+       ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
+
+       r_clr = r_set = w_clr = w_set = 0;
+
+       compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
+       compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
+
+       r_val = __HDFGRTR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HDFGWTR_EL2_nMASK;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
 }
 
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
-       u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
 
-       if (cpus_have_final_cap(ARM64_SME)) {
-               tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
 
-               r_set |= tmp;
-               w_set |= tmp;
-       }
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
 
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
 }
 
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
        vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 
-       if (__hfgxtr_traps_required())
-               __activate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+               u64 hcrx = HCRX_GUEST_FLAGS;
+               if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+                       u64 clr = 0, set = 0;
+
+                       compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+                       hcrx |= set;
+                       hcrx &= ~clr;
+               }
+
+               write_sysreg_s(hcrx, SYS_HCRX_EL2);
+       }
+
+       __activate_traps_hfgxtr(vcpu);
 }
 
 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
                vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
        }
 
-       if (__hfgxtr_traps_required())
-               __deactivate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX))
+               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+       __deactivate_traps_hfgxtr(vcpu);
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
 
        if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
                write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
                vcpu->arch.hcr_el2 &= ~HCR_VSE;
                vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
        }
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
index d5ec972..230e4f2 100644 (file)
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
 int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
                                  enum kvm_pgtable_prot prot,
                                  unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
 
 #endif /* __KVM_HYP_MM_H */
index a169c61..857d9bc 100644 (file)
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
        __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
 }
 
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+       DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+       __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
        HANDLE_FUNC(__kvm_tlb_flush_vmid),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
        HANDLE_FUNC(__kvm_flush_cpu_context),
        HANDLE_FUNC(__kvm_timer_set_cntvoff),
        HANDLE_FUNC(__vgic_v3_read_vmcr),
index 318298e..65a7a18 100644 (file)
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
        return err;
 }
 
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+       unsigned long cur;
+
+       hyp_assert_lock_held(&pkvm_pgd_lock);
+
+       if (!start || start < __io_map_base)
+               return -EINVAL;
+
+       /* The allocated size is always a multiple of PAGE_SIZE */
+       cur = start + PAGE_ALIGN(size);
+
+       /* Are we overflowing on the vmemmap ? */
+       if (cur > __hyp_vmemmap)
+               return -ENOMEM;
+
+       __io_map_base = cur;
+
+       return 0;
+}
+
 /**
  * pkvm_alloc_private_va_range - Allocates a private VA range.
  * @size:      The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
  */
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
 {
-       unsigned long base, addr;
-       int ret = 0;
+       unsigned long addr;
+       int ret;
 
        hyp_spin_lock(&pkvm_pgd_lock);
-
-       /* Align the allocation based on the order of its size */
-       addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
-       /* The allocated size is always a multiple of PAGE_SIZE */
-       base = addr + PAGE_ALIGN(size);
-
-       /* Are we overflowing on the vmemmap ? */
-       if (!addr || base > __hyp_vmemmap)
-               ret = -ENOMEM;
-       else {
-               __io_map_base = base;
-               *haddr = addr;
-       }
-
+       addr = __io_map_base;
+       ret = __pkvm_alloc_private_va_range(addr, size);
        hyp_spin_unlock(&pkvm_pgd_lock);
 
+       *haddr = addr;
+
        return ret;
 }
 
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
        return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
 }
 
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+       unsigned long addr, prev_base;
+       size_t size;
+       int ret;
+
+       hyp_spin_lock(&pkvm_pgd_lock);
+
+       prev_base = __io_map_base;
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       addr = ALIGN(__io_map_base, size);
+
+       ret = __pkvm_alloc_private_va_range(addr, size);
+       if (!ret) {
+               /*
+                * Since the stack grows downwards, map the stack to the page
+                * at the higher address and leave the lower guard page
+                * unbacked.
+                *
+                * Any valid stack address now has the PAGE_SHIFT bit as 1
+                * and addresses corresponding to the guard page have the
+                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+                */
+               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+                                         PAGE_SIZE, phys, PAGE_HYP);
+               if (ret)
+                       __io_map_base = prev_base;
+       }
+       hyp_spin_unlock(&pkvm_pgd_lock);
+
+       *haddr = addr + size;
+
+       return ret;
+}
+
 static void *admit_host_page(void *arg)
 {
        struct kvm_hyp_memcache *host_mc = arg;
index bb98630..0d5e0a8 100644 (file)
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 
        for (i = 0; i < hyp_nr_cpus; i++) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
-               unsigned long hyp_addr;
 
                start = (void *)kern_hyp_va(per_cpu_base[i]);
                end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
                if (ret)
                        return ret;
 
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+               ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
                if (ret)
                        return ret;
-
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               hyp_spin_lock(&pkvm_pgd_lock);
-               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
-                                       PAGE_SIZE, params->stack_pa, PAGE_HYP);
-               hyp_spin_unlock(&pkvm_pgd_lock);
-               if (ret)
-                       return ret;
-
-               /* Update stack_hyp_va to end of the stack's private VA range */
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        /*
index e89a231..c353a06 100644 (file)
@@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
                 * KVM_ARM_VCPU_INIT, however, this is likely not possible for
                 * protected VMs.
                 */
-               vcpu->arch.target = -1;
+               vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
                *exit_code |= ARM_EXCEPTION_IL;
        }
index b9991bb..1b26571 100644 (file)
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt, false);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+       if (icache_is_vpipt())
+               icache_inval_all_pou();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index f7a93ef..f155b8c 100644 (file)
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
        return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
 }
 
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size)
+{
+       unsigned long pages, inval_pages;
+
+       if (!system_supports_tlb_range()) {
+               kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+               return;
+       }
+
+       pages = size >> PAGE_SHIFT;
+       while (pages > 0) {
+               inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+               addr += inval_pages << PAGE_SHIFT;
+               pages -= inval_pages;
+       }
+}
+
 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
 
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
                 * evicted pte value (if any).
                 */
                if (kvm_pte_table(ctx->old, ctx->level))
-                       kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+                       kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+                                               kvm_granule_size(ctx->level));
                else if (kvm_pte_valid(ctx->old))
                        kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
                                     ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
        smp_store_release(ctx->ptep, new);
 }
 
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-                          struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+       /*
+        * If FEAT_TLBIRANGE is implemented, defer the individual
+        * TLB invalidations until the entire walk is finished, and
+        * then use the range-based TLBI instructions to do the
+        * invalidations. Condition deferred TLB invalidation on the
+        * system supporting FWB as the optimization is entirely
+        * pointless when the unmap walker needs to perform CMOs.
+        */
+       return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+                               struct kvm_s2_mmu *mmu,
+                               struct kvm_pgtable_mm_ops *mm_ops)
 {
+       struct kvm_pgtable *pgt = ctx->arg;
+
        /*
-        * Clear the existing PTE, and perform break-before-make with
-        * TLB maintenance if it was valid.
+        * Clear the existing PTE, and perform break-before-make if it was
+        * valid. Depending on the system support, defer the TLB maintenance
+        * for the same until the entire unmap walk is completed.
         */
        if (kvm_pte_valid(ctx->old)) {
                kvm_clear_pte(ctx->ptep);
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+               if (!stage2_unmap_defer_tlb_flush(pgt))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                       ctx->addr, ctx->level);
        }
 
        mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
         * block entry and rely on the remaining portions being faulted
         * back lazily.
         */
-       stage2_put_pte(ctx, mmu, mm_ops);
+       stage2_unmap_put_pte(ctx, mmu, mm_ops);
 
        if (need_flush && mm_ops->dcache_clean_inval_poc)
                mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
+       int ret;
        struct kvm_pgtable_walker walker = {
                .cb     = stage2_unmap_walker,
                .arg    = pgt,
                .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
        };
 
-       return kvm_pgtable_walk(pgt, addr, size, &walker);
+       ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+       if (stage2_unmap_defer_tlb_flush(pgt))
+               /* Perform the deferred TLB invalidations */
+               kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+       return ret;
 }
 
 struct stage2_attr_data {
index e69da55..46bd43f 100644 (file)
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       dsb(ishst);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index d3b4fee..587a104 100644 (file)
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
  * @kvm:       pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-       ++kvm->stat.generic.remote_tlb_flush_requests;
        kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+       return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                     gfn_t gfn, u64 nr_pages)
+{
+       kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+                               gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+       return 0;
 }
 
 static bool kvm_is_device_pfn(unsigned long pfn)
@@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
        return 0;
 }
 
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+       lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+       if (!PAGE_ALIGNED(base))
+               return -EINVAL;
+
+       /*
+        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+        * allocating the new area, as it would indicate we've
+        * overflowed the idmap/IO address range.
+        */
+       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+               return -ENOMEM;
+
+       io_map_base = base;
+
+       return 0;
+}
 
 /**
  * hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
 
        /*
         * This assumes that we have enough space below the idmap
-        * page to allocate our VAs. If not, the check below will
-        * kick. A potential alternative would be to detect that
-        * overflow and switch to an allocation above the idmap.
+        * page to allocate our VAs. If not, the check in
+        * __hyp_alloc_private_va_range() will kick. A potential
+        * alternative would be to detect that overflow and switch
+        * to an allocation above the idmap.
         *
         * The allocated size is always a multiple of PAGE_SIZE.
         */
-       base = io_map_base - PAGE_ALIGN(size);
-
-       /* Align the allocation based on the order of its size */
-       base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
-       /*
-        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
-        * allocating the new area, as it would indicate we've
-        * overflowed the idmap/IO address range.
-        */
-       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
-               ret = -ENOMEM;
-       else
-               *haddr = io_map_base = base;
+       size = PAGE_ALIGN(size);
+       base = io_map_base - size;
+       ret = __hyp_alloc_private_va_range(base);
 
        mutex_unlock(&kvm_hyp_pgd_mutex);
 
@@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
        return ret;
 }
 
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+       unsigned long base;
+       size_t size;
+       int ret;
+
+       mutex_lock(&kvm_hyp_pgd_mutex);
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       base = ALIGN_DOWN(io_map_base - size, size);
+
+       ret = __hyp_alloc_private_va_range(base);
+
+       mutex_unlock(&kvm_hyp_pgd_mutex);
+
+       if (ret) {
+               kvm_err("Cannot allocate hyp stack guard page\n");
+               return ret;
+       }
+
+       /*
+        * Since the stack grows downwards, map the stack to the page
+        * at the higher address and leave the lower guard page
+        * unbacked.
+        *
+        * Any valid stack address now has the PAGE_SHIFT bit as 1
+        * and addresses corresponding to the guard page have the
+        * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+        */
+       ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+                                   PAGE_HYP);
+       if (ret)
+               kvm_err("Cannot map hyp stack\n");
+
+       *haddr = base + size;
+
+       return ret;
+}
+
 /**
  * create_hyp_io_mappings - Map IO into both kernel and HYP
  * @phys_addr: The physical start address which gets mapped
@@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
        write_lock(&kvm->mmu_lock);
        stage2_wp_range(&kvm->arch.mmu, start, end);
        write_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
+       kvm_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
 /**
@@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
        read_unlock(&kvm->mmu_lock);
-       kvm_set_pfn_accessed(pfn);
        kvm_release_pfn_clean(pfn);
        return ret != -EAGAIN ? ret : 0;
 }
@@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.mmu.pgt)
                return false;
index 315354d..042695a 100644 (file)
@@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR0_EL1:
-               /* Hide ECV, FGT, ExS, Secure Memory */
-               val &= ~(GENMASK_ULL(63, 43)            |
+               /* Hide ECV, ExS, Secure Memory */
+               val &= ~(NV_FTR(MMFR0, ECV)             |
+                        NV_FTR(MMFR0, EXS)             |
                         NV_FTR(MMFR0, TGRAN4_2)        |
                         NV_FTR(MMFR0, TGRAN16_2)       |
                         NV_FTR(MMFR0, TGRAN64_2)       |
@@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR1_EL1:
-               val &= (NV_FTR(MMFR1, PAN)      |
+               val &= (NV_FTR(MMFR1, HCX)      |
+                       NV_FTR(MMFR1, PAN)      |
                        NV_FTR(MMFR1, LO)       |
                        NV_FTR(MMFR1, HPDS)     |
                        NV_FTR(MMFR1, VH)       |
@@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR2_EL1:
-               val &= ~(NV_FTR(MMFR2, EVT)     |
-                        NV_FTR(MMFR2, BBM)     |
+               val &= ~(NV_FTR(MMFR2, BBM)     |
                         NV_FTR(MMFR2, TTL)     |
                         GENMASK_ULL(47, 44)    |
                         NV_FTR(MMFR2, ST)      |
index 5606509..6b066e0 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
+#include <asm/arm_pmuv3.h>
 
 #define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
 
@@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
        return &vcpu->arch.pmu.pmc[cnt_idx];
 }
 
-static u32 kvm_pmu_event_mask(struct kvm *kvm)
+static u32 __kvm_pmu_event_mask(unsigned int pmuver)
 {
-       unsigned int pmuver;
-
-       pmuver = kvm->arch.arm_pmu->pmuver;
-
        switch (pmuver) {
        case ID_AA64DFR0_EL1_PMUVer_IMP:
                return GENMASK(9, 0);
@@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
        }
 }
 
+static u32 kvm_pmu_event_mask(struct kvm *kvm)
+{
+       u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
+       u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
+
+       return __kvm_pmu_event_mask(pmuver);
+}
+
 /**
  * kvm_pmc_is_64bit - determine if counter is 64bit
  * @pmc: counter context
@@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
        struct arm_pmu_entry *entry;
 
-       if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
-           pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+       /*
+        * Check the sanitised PMU version for the system, as KVM does not
+        * support implementations where PMUv3 exists on a subset of CPUs.
+        */
+       if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
                return;
 
        mutex_lock(&arm_pmus_lock);
@@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        } else {
                val = read_sysreg(pmceid1_el0);
                /*
-                * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+                * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
                 * as RAZ
                 */
-               if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
-                       val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
+               val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
                base = 32;
        }
 
@@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
                return 0;
        }
        case KVM_ARM_VCPU_PMU_V3_FILTER: {
+               u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
                struct kvm_pmu_event_filter __user *uaddr;
                struct kvm_pmu_event_filter filter;
                int nr_events;
 
-               nr_events = kvm_pmu_event_mask(kvm) + 1;
+               /*
+                * Allow userspace to specify an event filter for the entire
+                * event range supported by PMUVer of the hardware, rather
+                * than the guest's PMUVer for KVM backward compatibility.
+                */
+               nr_events = __kvm_pmu_event_mask(pmuver) + 1;
 
                uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
 
index 121f1a1..0eea225 100644 (file)
@@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
        ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
        return true;
 }
+
+/*
+ * If we interrupted the guest to update the host PMU context, make
+ * sure we re-apply the guest EL0 state.
+ */
+void kvm_vcpu_pmu_resync_el0(void)
+{
+       struct kvm_vcpu *vcpu;
+
+       if (!has_vhe() || !in_interrupt())
+               return;
+
+       vcpu = kvm_get_running_vcpu();
+       if (!vcpu)
+               return;
+
+       kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
+}
index bc8556b..7a65a35 100644 (file)
@@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
                }
        }
 
-       switch (vcpu->arch.target) {
-       default:
-               if (vcpu_el1_is_32bit(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_SVC;
-               } else if (vcpu_has_nv(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_EL2;
-               } else {
-                       pstate = VCPU_RESET_PSTATE_EL1;
-               }
-
-               if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-               break;
+       if (vcpu_el1_is_32bit(vcpu))
+               pstate = VCPU_RESET_PSTATE_SVC;
+       else if (vcpu_has_nv(vcpu))
+               pstate = VCPU_RESET_PSTATE_EL2;
+       else
+               pstate = VCPU_RESET_PSTATE_EL1;
+
+       if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+               ret = -EINVAL;
+               goto out;
        }
 
        /* Reset core registers */
index 2ca2973..e92ec81 100644 (file)
@@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
        { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
 
+       { SYS_DESC(SYS_ACCDATA_EL1), undef_access },
+
        { SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
 
        { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
@@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
        EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
        EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
        EL2_REG(HACR_EL2, access_rw, reset_val, 0),
 
+       EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+
        EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
        EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
        EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
@@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
 
        { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+       EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
        EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
        EL2_REG(ELR_EL2, access_rw, reset_val, 0),
        { SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
        trace_kvm_handle_sys_reg(esr);
 
+       if (__check_nv_sr_forward(vcpu))
+               return 1;
+
        params = esr_sys64_to_params(esr);
        params.regval = vcpu_get_reg(vcpu, Rt);
 
@@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
        if (!first_idreg)
                return -EINVAL;
 
+       if (kvm_get_mode() == KVM_MODE_NV)
+               return populate_nv_trap_config();
+
        return 0;
 }
index 6ce5c02..8ad5310 100644 (file)
@@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
                  __entry->hcr_el2)
 );
 
+TRACE_EVENT(kvm_forward_sysreg_trap,
+           TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
+           TP_ARGS(vcpu, sysreg, is_read),
+
+           TP_STRUCT__entry(
+               __field(u64,    pc)
+               __field(u32,    sysreg)
+               __field(bool,   is_read)
+           ),
+
+           TP_fast_assign(
+               __entry->pc = *vcpu_pc(vcpu);
+               __entry->sysreg = sysreg;
+               __entry->is_read = is_read;
+           ),
+
+           TP_printk("%llx %c (%d,%d,%d,%d,%d)",
+                     __entry->pc,
+                     __entry->is_read ? 'R' : 'W',
+                     sys_reg_Op0(__entry->sysreg),
+                     sys_reg_Op1(__entry->sysreg),
+                     sys_reg_CRn(__entry->sysreg),
+                     sys_reg_CRm(__entry->sysreg),
+                     sys_reg_Op2(__entry->sysreg))
+);
+
 #endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index f9923be..0ab09b0 100644 (file)
@@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
                         int offset, u32 *val);
@@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
index 78b87a6..2432683 100644 (file)
@@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
        const u64 *ptr;
        u64 data, sum64 = 0;
 
-       if (unlikely(len == 0))
+       if (unlikely(len <= 0))
                return 0;
 
        offset = (unsigned long)buff & 7;
index c80ed4f..c3f06fd 100644 (file)
@@ -26,6 +26,7 @@ HAS_ECV
 HAS_ECV_CNTPOFF
 HAS_EPAN
 HAS_EVT
+HAS_FGT
 HAS_GENERIC_AUTH
 HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
index 65866bf..2517ef7 100644 (file)
@@ -2156,6 +2156,135 @@ Field   1       ICIALLU
 Field  0       ICIALLUIS
 EndSysreg
 
+Sysreg HDFGRTR_EL2     3       4       3       1       4
+Field  63      PMBIDR_EL1
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Field  59      nBRBIDR
+Field  58      PMCEIDn_EL0
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Field  51      TRBIDR_EL1
+Field  50      TRBBASER_EL1
+Res0   49
+Field  48      TRCVICTLR
+Field  47      TRCSTATR
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Field  43      TRCOSLSR
+Res0   42
+Field  41      TRCIMSPECn
+Field  40      TRCID
+Res0   39:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Field  34      TRCAUTHSTATUS
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Field  30      PMSIDR_EL1
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Field  22      PMMIR_EL1
+Res0   21:20
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Field  9       OSLSR_EL1
+Res0   8
+Field  7       DBGPRCR_EL1
+Field  6       DBGAUTHSTATUS_EL1
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
+Sysreg HDFGWTR_EL2     3       4       3       1       5
+Res0   63
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Res0   59:58
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Res0   51
+Field  50      TRBBASER_EL1
+Field  49      TRFCR_EL1
+Field  48      TRCVICTLR
+Res0   47
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Res0   43
+Field  42      TRCOSLAR
+Field  41      TRCIMSPECn
+Res0   40:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Res0   34
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Res0   30
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Res0   22
+Field  21      PMCR_EL0
+Field  20      PMSWINC_EL0
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Res0   9
+Field  8       OSLAR_EL1
+Field  7       DBGPRCR_EL1
+Res0   6
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
 Sysreg ZCR_EL2 3       4       1       2       0
 Fields ZCR_ELx
 EndSysreg
index 3373324..aefae2e 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += vtime.h
index 5eba3fb..ac06d44 100644 (file)
@@ -37,7 +37,7 @@
  *     pNonSys:        !pSys
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/cache.h>
@@ -49,7 +49,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
-#include <asm/export.h>
 
 #include "minstate.h"
 
index 821e68d..9928c5b 100644 (file)
@@ -34,9 +34,9 @@
 #define PSR_BITS_TO_SET                                                        \
        (IA64_PSR_BN)
 
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Inputs:
index c096500..85c8a57 100644 (file)
@@ -20,7 +20,7 @@
  *   Support for CPU Hotplug
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/fpu.h>
@@ -33,7 +33,6 @@
 #include <asm/mca_asm.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET                            \
index 7a418e3..da90c49 100644 (file)
@@ -47,7 +47,7 @@
  * Table is based upon EAS2.6 (Oct 1999)
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/break.h>
@@ -58,7 +58,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #if 0
 # define PSR_DEFAULT_BITS      psr.ac
index 06d01a0..fb6db69 100644 (file)
@@ -13,9 +13,9 @@
  * 05/24/2000 eranian Added support for physical mode static calls
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
-#include <asm/export.h>
 
        .data
 pal_entry_point:
index 65b7508..ba0dd25 100644 (file)
@@ -10,9 +10,9 @@
  * 3/08/02 davidm      Some more tweaking
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_ITANIUM
 # define L3_LINE_SIZE  64      // Itanium L3 line size
index a28f39d..1d9e45c 100644 (file)
@@ -12,8 +12,8 @@
  *     Stephane Eranian <eranian@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // arguments
index 176f857..c0a0e6b 100644 (file)
@@ -15,9 +15,9 @@
  *
  * 4/06/01 davidm      Tuned to make it perform well both for cached and uncached copies.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PIPE_DEPTH     3
 #define EPI            p[PIPE_DEPTH-1]
index d6fd56e..5e8bb4b 100644 (file)
@@ -60,9 +60,9 @@
  *     to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
  *     an order that avoids bank conflicts.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PREFETCH_DIST  8               // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
 
index f681556..8daab72 100644 (file)
@@ -30,8 +30,8 @@
  *     - fix extraneous stop bit introduced by the EX() macro.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // Tuneable parameters
index 8573d59..f8e795f 100644 (file)
@@ -8,9 +8,8 @@
  * 05/28/05 Zoltan Menyhart    Dynamic stride size
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
-
 
        /*
         * flush_icache_range(start,end)
index def92b7..83586fb 100644 (file)
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
index a8ba3bd..5c91136 100644 (file)
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
index dc9e6e6..fcc0b81 100644 (file)
@@ -13,8 +13,8 @@
  * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Since we know that most likely this function is called with buf aligned
index 91a625f..35c9069 100644 (file)
@@ -14,8 +14,8 @@
  *     Stephane Eranian <eranian@hpl.hp.com>
  *     David Mosberger-Tang <davidm@hpl.hp.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(memcpy)
 
index cc4e6ac..c0d4362 100644 (file)
@@ -14,9 +14,9 @@
  * Copyright (C) 2002 Intel Corp.
  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define EK(y...) EX(y)
 
index 07a8b92..552c5c7 100644 (file)
@@ -18,8 +18,8 @@
    Since a stf.spill f0 can store 16B in one go, we use this instruction
    to get peak speed when value = 0.  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 #undef ret
 
 #define dest           in0
index d66de59..1f4a46c 100644 (file)
@@ -17,8 +17,8 @@
  * 09/24/99 S.Eranian add speculation recovery code
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 //
index 49eb81b..a287169 100644 (file)
@@ -17,8 +17,8 @@
  *                      by Andreas Schwab <schwab@suse.de>).
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strncpy_from_user)
        alloc r2=ar.pfs,3,0,0,0
index 4b684d4..a7eb56e 100644 (file)
@@ -13,8 +13,8 @@
  * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strnlen_user)
        .prologue
index 5413daf..6e2a696 100644 (file)
@@ -5,8 +5,8 @@
  * Optimized RAID-5 checksumming functions for IA-64.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(xor_ia64_2)
        .prologue
index ecf282d..e14396a 100644 (file)
@@ -8,11 +8,13 @@ config LOONGARCH
        select ACPI_PPTT if ACPI
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_BINFMT_ELF_STATE
+       select ARCH_DISABLE_KASAN_INLINE
        select ARCH_ENABLE_MEMORY_HOTPLUG
        select ARCH_ENABLE_MEMORY_HOTREMOVE
        select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_FORTIFY_SOURCE
+       select ARCH_HAS_KCOV
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
+       select HAVE_ARCH_KASAN
+       select HAVE_ARCH_KFENCE
+       select HAVE_ARCH_KGDB if PERF_EVENTS
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
 config AS_HAS_LASX_EXTENSION
        def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
 
+config AS_HAS_LBT_EXTENSION
+       def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
 
          If unsure, say Y.
 
+config CPU_HAS_LBT
+       bool "Support for the Loongson Binary Translation Extension"
+       depends on AS_HAS_LBT_EXTENSION
+       help
+         Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+         to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+         Enabling this option allows the kernel to allocate and switch registers
+         specific to LBT.
+
+         If you want to use this feature, such as the Loongson Architecture
+         Translator (LAT), say Y.
+
 config CPU_HAS_PREFETCH
        bool
        default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
+config KASAN_SHADOW_OFFSET
+       hex
+       default 0x0
+       depends on KASAN
+
 menu "Power management options"
 
 config ARCH_SUSPEND_POSSIBLE
index ef87bab..fb0fada 100644 (file)
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux                      += -static -pie --no-dynamic-linker -z notext
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
index d64849b..a3b52aa 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
 CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
 CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_NFCT=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
 CONFIG_IP_NF_TARGET_REDIRECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
 CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_TEHUTI is not set
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
 CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
 CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_RESTART=y
 CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AST=y
 CONFIG_DRM_QXL=m
 CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
 CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
 CONFIG_OVERLAY_FS_XINO_AUTO=y
 CONFIG_OVERLAY_FS_METACOPY=y
 CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=936
 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_936=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
 CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
index ed06d39..cf8e1a4 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/ftrace.h>
index 79e1d53..c9544f3 100644 (file)
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 
-       .macro  parse_v var val
-       \var    = \val
-       .endm
-
-       .macro  parse_r var r
-       \var    = -1
-       .ifc    \r, $r0
-       \var    = 0
-       .endif
-       .ifc    \r, $r1
-       \var    = 1
-       .endif
-       .ifc    \r, $r2
-       \var    = 2
-       .endif
-       .ifc    \r, $r3
-       \var    = 3
-       .endif
-       .ifc    \r, $r4
-       \var    = 4
-       .endif
-       .ifc    \r, $r5
-       \var    = 5
-       .endif
-       .ifc    \r, $r6
-       \var    = 6
-       .endif
-       .ifc    \r, $r7
-       \var    = 7
-       .endif
-       .ifc    \r, $r8
-       \var    = 8
-       .endif
-       .ifc    \r, $r9
-       \var    = 9
-       .endif
-       .ifc    \r, $r10
-       \var    = 10
-       .endif
-       .ifc    \r, $r11
-       \var    = 11
-       .endif
-       .ifc    \r, $r12
-       \var    = 12
-       .endif
-       .ifc    \r, $r13
-       \var    = 13
-       .endif
-       .ifc    \r, $r14
-       \var    = 14
-       .endif
-       .ifc    \r, $r15
-       \var    = 15
-       .endif
-       .ifc    \r, $r16
-       \var    = 16
-       .endif
-       .ifc    \r, $r17
-       \var    = 17
-       .endif
-       .ifc    \r, $r18
-       \var    = 18
-       .endif
-       .ifc    \r, $r19
-       \var    = 19
-       .endif
-       .ifc    \r, $r20
-       \var    = 20
-       .endif
-       .ifc    \r, $r21
-       \var    = 21
-       .endif
-       .ifc    \r, $r22
-       \var    = 22
-       .endif
-       .ifc    \r, $r23
-       \var    = 23
-       .endif
-       .ifc    \r, $r24
-       \var    = 24
-       .endif
-       .ifc    \r, $r25
-       \var    = 25
-       .endif
-       .ifc    \r, $r26
-       \var    = 26
-       .endif
-       .ifc    \r, $r27
-       \var    = 27
-       .endif
-       .ifc    \r, $r28
-       \var    = 28
-       .endif
-       .ifc    \r, $r29
-       \var    = 29
-       .endif
-       .ifc    \r, $r30
-       \var    = 30
-       .endif
-       .ifc    \r, $r31
-       \var    = 31
-       .endif
-       .iflt   \var
-       .error  "Unable to parse register name \r"
-       .endif
-       .endm
-
        .macro  cpu_save_nonscratch thread
        stptr.d s0, \thread, THREAD_REG23
        stptr.d s1, \thread, THREAD_REG24
 
        .macro fpu_save_csr thread tmp
        movfcsr2gr      \tmp, fcsr0
-       stptr.w \tmp, \thread, THREAD_FCSR
+       stptr.w         \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp, \tmp, FPU_CSR_TM
+       beqz            \tmp, 1f
+       /* Save FTOP */
+       x86mftop        \tmp
+       stptr.w         \tmp, \thread, THREAD_FTOP
+       /* Turn off TM to ensure the order of FPR in memory independent of TM */
+       x86clrtm
+1:
+#endif
        .endm
 
-       .macro fpu_restore_csr thread tmp
-       ldptr.w \tmp, \thread, THREAD_FCSR
-       movgr2fcsr      fcsr0, \tmp
+       .macro fpu_restore_csr thread tmp0 tmp1
+       ldptr.w         \tmp0, \thread, THREAD_FCSR
+       movgr2fcsr      fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 2f
+       /* Restore FTOP */
+       ldptr.w         \tmp0, \thread, THREAD_FTOP
+       andi            \tmp0, \tmp0, 0x7
+       la.pcrel        \tmp1, 1f
+       alsl.d          \tmp1, \tmp0, \tmp1, 3
+       jr              \tmp1
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+#endif
        .endm
 
        .macro fpu_save_cc thread tmp0 tmp1
        .macro  lsx_restore_all thread tmp0 tmp1
        lsx_restore_data        \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lsx_save_upper vd base tmp off
        .macro  lasx_restore_all thread tmp0 tmp1
        lasx_restore_data       \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..deeff81
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT    (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK     GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE            (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG                (0x8000)
+#define XKPRANGE_CC_SEG                (0x9000)
+#define XKVRANGE_VC_SEG                (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START              CACHE_BASE
+#define XKPRANGE_CC_SIZE               XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET       (0)
+#define XKPRANGE_CC_SHADOW_SIZE                (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END         (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START              UNCACHE_BASE
+#define XKPRANGE_UC_SIZE               XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET       XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE                (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END         (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached)  */
+#define XKVRANGE_VC_START              MODULES_VADDR
+#define XKVRANGE_VC_SIZE               round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET       XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE                (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END         (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START             round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE              (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END               round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+       return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+       if (!kasan_arch_is_ready()) {
+               return (void *)(kasan_early_shadow_page);
+       } else {
+               unsigned long maddr = (unsigned long)addr;
+               unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+               unsigned long offset = 0;
+
+               maddr &= XRANGE_SHADOW_MASK;
+               switch (xrange) {
+               case XKPRANGE_CC_SEG:
+                       offset = XKPRANGE_CC_SHADOW_OFFSET;
+                       break;
+               case XKPRANGE_UC_SEG:
+                       offset = XKPRANGE_UC_SHADOW_OFFSET;
+                       break;
+               case XKVRANGE_VC_SEG:
+                       offset = XKVRANGE_VC_SHADOW_OFFSET;
+                       break;
+               default:
+                       WARN_ON(1);
+                       return NULL;
+               }
+
+               return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+       }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+       unsigned long addr = (unsigned long)shadow_addr;
+
+       if (unlikely(addr > KASAN_SHADOW_END) ||
+               unlikely(addr < KASAN_SHADOW_START)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+               return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+       else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+       else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+       else {
+               WARN_ON(1);
+               return NULL;
+       }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..6c82aea
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+       int err;
+       char *kfence_pool = __kfence_pool;
+       struct vm_struct *area;
+
+       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+                                   KFENCE_AREA_START, KFENCE_AREA_END,
+                                   __builtin_return_address(0));
+       if (!area)
+               return false;
+
+       __kfence_pool = (char *)area->addr;
+       err = ioremap_page_range((unsigned long)__kfence_pool,
+                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+                                virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+       if (err) {
+               free_vm_area(area);
+               __kfence_pool = kfence_pool;
+               return false;
+       }
+
+       return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       pte_t *pte = virt_to_kpte(addr);
+
+       if (WARN_ON(!pte) || pte_none(*pte))
+               return false;
+
+       if (protect)
+               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+       else
+               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+       preempt_disable();
+       local_flush_tlb_one(addr);
+       preempt_enable();
+
+       return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644 (file)
index 0000000..2041ae5
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG         sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ *     r0-r31: 64 bit
+ *     orig_a0: 64 bit
+ *     pc : 64 bit
+ *     csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE       0
+#define DBG_PT_REGS_NUM                35
+#define DBG_PT_REGS_END                (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ *     f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE           (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM            32
+#define DBG_FPR_END            (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ *     fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE           (DBG_FPR_END + 1)
+#define DBG_FCC_NUM            8
+#define DBG_FCC_END            (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ *     fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM           1
+#define DBG_FCSR               (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM                (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX                 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES            ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE       4
+#define CACHE_FLUSH_IS_SAFE    0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+       DBG_LOONGARCH_ZERO = 0,
+       DBG_LOONGARCH_RA,
+       DBG_LOONGARCH_TP,
+       DBG_LOONGARCH_SP,
+       DBG_LOONGARCH_A0,
+       DBG_LOONGARCH_FP = 22,
+       DBG_LOONGARCH_S0,
+       DBG_LOONGARCH_S1,
+       DBG_LOONGARCH_S2,
+       DBG_LOONGARCH_S3,
+       DBG_LOONGARCH_S4,
+       DBG_LOONGARCH_S5,
+       DBG_LOONGARCH_S6,
+       DBG_LOONGARCH_S7,
+       DBG_LOONGARCH_S8,
+       DBG_LOONGARCH_ORIG_A0,
+       DBG_LOONGARCH_PC,
+       DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644 (file)
index 0000000..e671978
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+               1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+       return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+       enable_lbt();
+       set_thread_flag(TIF_USEDLBT);
+       KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+       if (cpu_has_lbt && !is_lbt_owner()) {
+               __own_lbt();
+               if (restore)
+                       _restore_lbt(&current->thread.lbt);
+       }
+}
+
+static inline void own_lbt(int restore)
+{
+       preempt_disable();
+       own_lbt_inatomic(restore);
+       preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+       if (cpu_has_lbt && is_lbt_owner()) {
+               if (save)
+                       _save_lbt(&tsk->thread.lbt);
+
+               disable_lbt();
+               clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+       }
+       KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+       preempt_disable();
+       lose_lbt_inatomic(save, current);
+       preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+       __own_lbt();
+       _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
index 10748a2..33531d4 100644 (file)
 #ifndef __ASSEMBLY__
 #include <larchintrin.h>
 
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- *                     "#invtlb op, 0, %0\n\t"
- *                     ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- *                     : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n)                            \
-       ".ifc   \\r, $r" #n "\n\t"              \
-       "\\var  = " #n "\n\t"                   \
-       ".endif\n\t"
-
-__asm__(".macro        parse_r var r\n\t"
-       "\\var  = -1\n\t"
-       _IFC_REG(0)  _IFC_REG(1)  _IFC_REG(2)  _IFC_REG(3)
-       _IFC_REG(4)  _IFC_REG(5)  _IFC_REG(6)  _IFC_REG(7)
-       _IFC_REG(8)  _IFC_REG(9)  _IFC_REG(10) _IFC_REG(11)
-       _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
-       _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
-       _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
-       _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
-       _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
-       ".iflt  \\var\n\t"
-       ".error \"Unable to parse register name \\r\"\n\t"
-       ".endif\n\t"
-       ".endm");
-
-#undef _IFC_REG
-
 /* CPUCFG */
 #define read_cpucfg(reg) __cpucfg(reg)
 
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
 #define FPU_CSR_RU     0x200   /* towards +Infinity */
 #define FPU_CSR_RD     0x300   /* towards -Infinity */
 
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT       0x6
+#define FPU_CSR_TM             (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
 #define read_fcsr(source)      \
 ({     \
        unsigned int __res;     \
index fe67d0b..2b9a907 100644 (file)
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid) (node_data[(nid)])
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
index 26e8dcc..63f137c 100644 (file)
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define sym_to_pfn(x)          __phys_to_pfn(__pa_symbol(x))
 
 #define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ?                                  \
+       dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
 
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
index 23f5b11..79470f0 100644 (file)
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
 #endif /* _ASM_PGALLOC_H */
index 06963a1..29d9b12 100644 (file)
@@ -70,12 +70,9 @@ struct vm_area_struct;
  * for zero-mapped memory areas etc..
  */
 
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) \
-       (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr)       virt_to_page(empty_zero_page)
 
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
 #define MODULES_VADDR  (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
 #define MODULES_END    (MODULES_VADDR + SZ_256M)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE       0
+#endif
+
 #define VMALLOC_START  MODULES_END
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_END    \
        (vm_map_base +  \
-        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END    \
+       (vm_map_base +  \
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
 
 #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#define KFENCE_AREA_START      (VMEMMAP_END + 1)
+#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
 #define pte_ERROR(e) \
        pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define pmd_leaf(pmd)          ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud)          ((pud_val(pud) & _PAGE_HUGE) != 0)
+
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
index 636e1c6..c3bc44b 100644 (file)
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
-       unsigned int    fcsr;
        uint64_t        fcc;    /* 8x8 */
+       uint32_t        fcsr;
+       uint32_t        ftop;
        union fpureg    fpr[NUM_FPU_REGS];
 };
 
+struct loongarch_lbt {
+       /* Scratch registers */
+       unsigned long scr0;
+       unsigned long scr1;
+       unsigned long scr2;
+       unsigned long scr3;
+       /* Eflags register */
+       unsigned long eflags;
+};
+
 #define INIT_CPUMASK { \
        {0,} \
 }
@@ -113,15 +124,6 @@ struct thread_struct {
        unsigned long csr_ecfg;
        unsigned long csr_badvaddr;     /* Last user fault */
 
-       /* Scratch registers */
-       unsigned long scr0;
-       unsigned long scr1;
-       unsigned long scr2;
-       unsigned long scr3;
-
-       /* Eflags register */
-       unsigned long eflags;
-
        /* Other stuff associated with the thread. */
        unsigned long trap_nr;
        unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
         * context because they are conditionally copied at fork().
         */
        struct loongarch_fpu fpu FPU_ALIGN;
+       struct loongarch_lbt lbt; /* Also conditionally copied */
 
        /* Hardware breakpoints pinned to this task. */
        struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
         * FPU & vector registers                               \
         */                                                     \
        .fpu                    = {                             \
-               .fcsr           = 0,                            \
                .fcc            = 0,                            \
+               .fcsr           = 0,                            \
+               .ftop           = 0,                            \
                .fpr            = {{{0,},},},                   \
        },                                                      \
        .hbp_break              = {0},                          \
index be05c0e..a0bc159 100644 (file)
@@ -7,6 +7,7 @@
 #define _LOONGARCH_SETUP_H
 
 #include <linux/types.h>
+#include <asm/sections.h>
 #include <uapi/asm/setup.h>
 
 #define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
 extern long __rela_dyn_begin;
 extern long __rela_dyn_end;
 
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
 
 #endif
 
+static inline unsigned long kaslr_offset(void)
+{
+       return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
 #endif /* __SETUP_H */
index 7df80e6..4fb1e64 100644 (file)
        cfi_st  u0, PT_R21, \docfi
        csrrd   u0, PERCPU_BASE_KS
 9:
+#ifdef CONFIG_KGDB
+       li.w    t0, CSR_CRMD_WE
+       csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
        .endm
 
        .macro  SAVE_ALL docfi=0
index 7b29cc9..5bb5a90 100644 (file)
@@ -7,11 +7,31 @@
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
 
 #endif /* _ASM_STRING_H */
index 24e3094..5b225af 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 
 struct task_struct;
 
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)                                            \
 do {                                                                           \
        lose_fpu_inatomic(1, prev);                                             \
+       lose_lbt_inatomic(1, prev);                                             \
        hw_breakpoint_thread_switch(next);                                      \
        (last) = __switch_to(prev, next, task_thread_info(next),                \
                 __builtin_return_address(0), __builtin_frame_address(0));      \
index 1a3354c..8cb653d 100644 (file)
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_SINGLESTEP         16      /* Single Step */
 #define TIF_LSX_CTX_LIVE       17      /* LSX context must be preserved */
 #define TIF_LASX_CTX_LIVE      18      /* LASX context must be preserved */
+#define TIF_USEDLBT            19      /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE       20      /* LBT context must be preserved */
 
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
 #define _TIF_LSX_CTX_LIVE      (1<<TIF_LSX_CTX_LIVE)
 #define _TIF_LASX_CTX_LIVE     (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT           (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE      (1<<TIF_LBT_CTX_LIVE)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644 (file)
index 0000000..12467ff
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+       .name = "lsx",
+       .do_2 = xor_lsx_2,
+       .do_3 = xor_lsx_3,
+       .do_4 = xor_lsx_4,
+       .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX()                                        \
+       do {                                            \
+               if (cpu_has_lsx)                        \
+                       xor_speed(&xor_block_lsx);      \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+       .name = "lasx",
+       .do_2 = xor_lasx_2,
+       .do_3 = xor_lasx_3,
+       .do_4 = xor_lasx_4,
+       .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX()                                       \
+       do {                                                    \
+               if (cpu_has_lasx)                               \
+                       xor_speed(&xor_block_lasx);             \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+do {                                                   \
+       xor_speed(&xor_block_8regs);                    \
+       xor_speed(&xor_block_8regs_p);                  \
+       xor_speed(&xor_block_32regs);                   \
+       xor_speed(&xor_block_32regs_p);                 \
+       XOR_SPEED_LSX();                                \
+       XOR_SPEED_LASX();                               \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644 (file)
index 0000000..471b963
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
index 06e3be5..ac915f8 100644 (file)
@@ -56,6 +56,12 @@ struct user_lasx_state {
        uint64_t vregs[32*4];
 };
 
+struct user_lbt_state {
+       uint64_t scr[4];
+       uint32_t eflags;
+       uint32_t ftop;
+};
+
 struct user_watch_state {
        uint64_t dbg_info;
        struct {
index 4cd7d16..6c22f61 100644 (file)
@@ -59,4 +59,14 @@ struct lasx_context {
        __u32   fcsr;
 };
 
+/* LBT context */
+#define LBT_CTX_MAGIC          0x42540001
+#define LBT_CTX_ALIGN          8
+struct lbt_context {
+       __u64   regs[4];
+       __u32   eflags;
+       __u32   ftop;
+};
+
+
 #endif /* _UAPI_ASM_SIGCONTEXT_H */
index 8e279f0..c56ea0b 100644 (file)
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI)             += efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o kfpu.o
 
+obj-$(CONFIG_CPU_HAS_LBT)      += lbt.o
+
 obj-$(CONFIG_ARCH_STRICT_ALIGN)        += unaligned.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
   CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
 endif
 
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
 obj-$(CONFIG_MODULES)          += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 
+obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_RETHOOK)          += rethook.o rethook_trampoline.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
index 505e4bf..8da0726 100644 (file)
@@ -118,13 +118,6 @@ void output_thread_defines(void)
        OFFSET(THREAD_CSRECFG, task_struct,
               thread.csr_ecfg);
 
-       OFFSET(THREAD_SCR0, task_struct, thread.scr0);
-       OFFSET(THREAD_SCR1, task_struct, thread.scr1);
-       OFFSET(THREAD_SCR2, task_struct, thread.scr2);
-       OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
-       OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
        OFFSET(THREAD_FPU, task_struct, thread.fpu);
 
        OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
 
        OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
        OFFSET(THREAD_FCC,  loongarch_fpu, fcc);
+       OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+       BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+       OFFSET(THREAD_SCR0,  loongarch_lbt, scr0);
+       OFFSET(THREAD_SCR1,  loongarch_lbt, scr1);
+       OFFSET(THREAD_SCR2,  loongarch_lbt, scr2);
+       OFFSET(THREAD_SCR3,  loongarch_lbt, scr3);
+       OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
        BLANK();
 }
 
index e925579..5532081 100644 (file)
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
                c->options |= LOONGARCH_CPU_LVZ;
                elf_hwcap |= HWCAP_LOONGARCH_LVZ;
        }
+#ifdef CONFIG_CPU_HAS_LBT
+       if (config & CPUCFG2_X86BT) {
+               c->options |= LOONGARCH_CPU_LBT_X86;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+       }
+       if (config & CPUCFG2_ARMBT) {
+               c->options |= LOONGARCH_CPU_LBT_ARM;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+       }
+       if (config & CPUCFG2_MIPSBT) {
+               c->options |= LOONGARCH_CPU_LBT_MIPS;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+       }
+#endif
 
        config = read_cpucfg(LOONGARCH_CPUCFG6);
        if (config & CPUCFG6_PMP)
index d737e3c..65518bb 100644 (file)
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
 
        SAVE_STATIC
 
+#ifdef CONFIG_KGDB
+       li.w            t1, CSR_CRMD_WE
+       csrxchg         t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
        move            u0, t0
        li.d            tp, ~_THREAD_MASK
        and             tp, tp, sp
index 501094a..d53ab10 100644 (file)
@@ -22,7 +22,7 @@
 
        .macro  EX insn, reg, src, offs
 .ex\@: \insn   \reg, \src, \offs
-       _asm_extable .ex\@, fault
+       _asm_extable .ex\@, .L_fpu_fault
        .endm
 
        .macro sc_save_fp base
        .macro sc_save_fcsr base, tmp0
        movfcsr2gr      \tmp0, fcsr0
        EX      st.w    \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 1f
+       x86clrtm
+1:
+#endif
        .endm
 
        .macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 SYM_FUNC_START(_restore_fp)
        fpu_restore_double      a0 t1           # clobbers t1
-       fpu_restore_csr         a0 t1
+       fpu_restore_csr         a0 t1 t2
        fpu_restore_cc          a0 t1 t2        # clobbers t1, t2
        jr                      ra
 SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
        jr      ra
 SYM_FUNC_END(_restore_lasx_context)
 
-SYM_FUNC_START(fault)
+.L_fpu_fault:
        li.w    a0, -EFAULT                             # failure
        jr      ra
-SYM_FUNC_END(fault)
index 5e828a8..53b883d 100644 (file)
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry)                        # kernel entry point
        PTR_LI          sp, (_THREAD_SIZE - PT_SIZE)
        PTR_ADD         sp, sp, tp
        set_saved_sp    sp, t0, t1
-#endif
 
-       /* relocate_kernel() returns the new kernel entry point */
-       jr              a0
-       ASM_BUG()
+       /* Jump to the new kernel: new_pc = current_pc + random_offset */
+       pcaddi          t0, 0
+       add.d           t0, t0, a0
+       jirl            zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
 
+#ifdef CONFIG_KASAN
+       bl              kasan_early_init
 #endif
 
        bl              start_kernel
index 5c46ae8..ec5b28e 100644 (file)
@@ -8,19 +8,40 @@
 #include <asm/fpu.h>
 #include <asm/smp.h>
 
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
 
 void kernel_fpu_begin(void)
 {
+       unsigned int *euen_curr;
+
        preempt_disable();
 
        WARN_ON(this_cpu_read(in_kernel_fpu));
 
        this_cpu_write(in_kernel_fpu, true);
+       euen_curr = this_cpu_ptr(&euen_current);
 
-       if (!is_fpu_owner())
-               enable_fpu();
+       *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _save_lasx(&current->thread.fpu);
+       else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _save_lsx(&current->thread.fpu);
        else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _save_fp(&current->thread.fpu);
 
        write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
+       unsigned int *euen_curr;
+
        WARN_ON(!this_cpu_read(in_kernel_fpu));
 
-       if (!is_fpu_owner())
-               disable_fpu();
+       euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _restore_lasx(&current->thread.fpu);
        else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _restore_lsx(&current->thread.fpu);
+       else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _restore_fp(&current->thread.fpu);
 
+       *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
        this_cpu_write(in_kernel_fpu, false);
 
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+       if (cpu_has_lsx)
+               euen_mask |= CSR_EUEN_LSXEN;
+
+       if (cpu_has_lasx)
+               euen_mask |= CSR_EUEN_LASXEN;
+
+       return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644 (file)
index 0000000..445c452
--- /dev/null
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+       { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+       { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+       { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+       { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+       { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+       { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+       { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+       { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+       { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+       { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+       { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+       { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+       { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+       { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+       { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+       { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+       { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+       { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+       { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+       { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+       { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+       { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+       { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+       { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+       { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+       { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+       { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+       { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+       { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+       { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+       { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+       { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+       { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+       { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+       { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+       { "f0", GDB_SIZEOF_REG, 0 },
+       { "f1", GDB_SIZEOF_REG, 1 },
+       { "f2", GDB_SIZEOF_REG, 2 },
+       { "f3", GDB_SIZEOF_REG, 3 },
+       { "f4", GDB_SIZEOF_REG, 4 },
+       { "f5", GDB_SIZEOF_REG, 5 },
+       { "f6", GDB_SIZEOF_REG, 6 },
+       { "f7", GDB_SIZEOF_REG, 7 },
+       { "f8", GDB_SIZEOF_REG, 8 },
+       { "f9", GDB_SIZEOF_REG, 9 },
+       { "f10", GDB_SIZEOF_REG, 10 },
+       { "f11", GDB_SIZEOF_REG, 11 },
+       { "f12", GDB_SIZEOF_REG, 12 },
+       { "f13", GDB_SIZEOF_REG, 13 },
+       { "f14", GDB_SIZEOF_REG, 14 },
+       { "f15", GDB_SIZEOF_REG, 15 },
+       { "f16", GDB_SIZEOF_REG, 16 },
+       { "f17", GDB_SIZEOF_REG, 17 },
+       { "f18", GDB_SIZEOF_REG, 18 },
+       { "f19", GDB_SIZEOF_REG, 19 },
+       { "f20", GDB_SIZEOF_REG, 20 },
+       { "f21", GDB_SIZEOF_REG, 21 },
+       { "f22", GDB_SIZEOF_REG, 22 },
+       { "f23", GDB_SIZEOF_REG, 23 },
+       { "f24", GDB_SIZEOF_REG, 24 },
+       { "f25", GDB_SIZEOF_REG, 25 },
+       { "f26", GDB_SIZEOF_REG, 26 },
+       { "f27", GDB_SIZEOF_REG, 27 },
+       { "f28", GDB_SIZEOF_REG, 28 },
+       { "f29", GDB_SIZEOF_REG, 29 },
+       { "f30", GDB_SIZEOF_REG, 30 },
+       { "f31", GDB_SIZEOF_REG, 31 },
+       { "fcc0", 1, 0 },
+       { "fcc1", 1, 1 },
+       { "fcc2", 1, 2 },
+       { "fcc3", 1, 3 },
+       { "fcc4", 1, 4 },
+       { "fcc5", 1, 5 },
+       { "fcc6", 1, 6 },
+       { "fcc7", 1, 7 },
+       { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return NULL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               goto out;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy(mem, (void *)regs + reg_offset, reg_size);
+               goto out;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               goto out;
+
+       save_fp(current);
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+               break;
+       default:
+               break;
+       }
+
+out:
+       return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return -EINVAL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               return 0;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy((void *)regs + reg_offset, mem, reg_size);
+               return 0;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               return 0;
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+               break;
+       default:
+               break;
+       }
+
+       restore_fp(current);
+
+       return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+       /* Initialize to zero */
+       memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+       gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+       gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+       gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+       /* S0 - S8 */
+       gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+       gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+       gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+       gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+       gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+       gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+       gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+       gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+       gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+       /*
+        * PC use return address (RA), i.e. the moment after return from __switch_to()
+        */
+       gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+       regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+       __asm__ __volatile__ (                  \
+               ".globl kgdb_breakinst\n\t"     \
+               "nop\n"                         \
+               "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+
+       /* Userspace events, ignore. */
+       if (user_mode(regs))
+               return NOTIFY_DONE;
+
+       if (!kgdb_io_module_registered)
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_active) != -1)
+               kgdb_nmicallback(smp_processor_id(), regs);
+
+       if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_setting_breakpoint))
+               if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+                       regs->csr_era += LOONGARCH_INSN_SIZE;
+
+       return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+       struct die_args args = {
+               .regs   = regs,
+               .str    = "Break",
+               .err    = BRK_KDB,
+               .trapnr = read_csr_excode(),
+               .signr  = SIGTRAP,
+
+       };
+
+       return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+       .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+                                        char *remcom_in_buffer)
+{
+       unsigned long addr;
+       char *ptr;
+
+       ptr = &remcom_in_buffer[1];
+       if (kgdb_hex2long(&ptr, &addr))
+               regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+       char cj_val;
+       unsigned int si, si_l, si_h, rd, rj, cj;
+       unsigned long pc = instruction_pointer(regs);
+       union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return -EINVAL;
+       }
+
+       *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+       si_h = ip->reg0i26_format.immediate_h;
+       si_l = ip->reg0i26_format.immediate_l;
+       switch (ip->reg0i26_format.opcode) {
+       case b_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               return 0;
+       case bl_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+               return 0;
+       }
+
+       rj = ip->reg1i21_format.rj;
+       cj = (rj & 0x07) + DBG_FCC_BASE;
+       si_l = ip->reg1i21_format.immediate_l;
+       si_h = ip->reg1i21_format.immediate_h;
+       dbg_get_reg(cj, &cj_val, regs);
+       switch (ip->reg1i21_format.opcode) {
+       case beqz_op:
+               if (regs->regs[rj] == 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bnez_op:
+               if (regs->regs[rj] != 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bceqz_op: /* bceqz_op = bcnez_op */
+               if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       }
+
+       rj = ip->reg2i16_format.rj;
+       rd = ip->reg2i16_format.rd;
+       si = ip->reg2i16_format.immediate;
+       switch (ip->reg2i16_format.opcode) {
+       case beq_op:
+               if (regs->regs[rj] == regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bne_op:
+               if (regs->regs[rj] != regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case blt_op:
+               if ((long)regs->regs[rj] < (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bge_op:
+               if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bltu_op:
+               if (regs->regs[rj] < regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bgeu_op:
+               if (regs->regs[rj] >= regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case jirl_op:
+               regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+               *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+               return 0;
+       }
+
+       return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+       int error = 0;
+       unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+       error = get_step_address(regs, &addr);
+       if (error)
+               return error;
+
+       /* Store the opcode in the stepped address */
+       error = get_kernel_nofault(stepped_opcode, (void *)addr);
+       if (error)
+               return error;
+
+       stepped_address = addr;
+
+       /* Replace the opcode with the break instruction */
+       error = copy_to_kernel_nofault((void *)stepped_address,
+                                      arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+       flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+       if (error) {
+               stepped_opcode = 0;
+               stepped_address = 0;
+       } else {
+               kgdb_single_step = 1;
+               atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+       }
+
+       return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+       if (stepped_opcode) {
+               copy_to_kernel_nofault((void *)stepped_address,
+                                      (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+               flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+       }
+
+       stepped_opcode = 0;
+       stepped_address = 0;
+       kgdb_single_step = 0;
+       atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+                              char *remcom_in_buffer, char *remcom_out_buffer,
+                              struct pt_regs *regs)
+{
+       int ret = 0;
+
+       undo_single_step(regs);
+       regs->csr_prmd |= CSR_PRMD_PWE;
+
+       switch (remcom_in_buffer[0]) {
+       case 'D':
+       case 'k':
+               regs->csr_prmd &= ~CSR_PRMD_PWE;
+               fallthrough;
+       case 'c':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               break;
+       case 's':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               ret = do_single_step(regs);
+               break;
+       default:
+               ret = -1;
+       }
+
+       return ret;
+}
+
+static struct hw_breakpoint {
+       unsigned int            enabled;
+       unsigned long           addr;
+       int                     len;
+       int                     type;
+       struct perf_event       * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+       int cpu, cnt = 0;
+       struct perf_event **pevent;
+
+       for_each_online_cpu(cpu) {
+               cnt++;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_reserve_bp_slot(*pevent))
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       for_each_online_cpu(cpu) {
+               cnt--;
+               if (!cnt)
+                       break;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               dbg_release_bp_slot(*pevent);
+       }
+
+       return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+       int cpu;
+       struct perf_event **pevent;
+
+       if (dbg_is_early)
+               return 0;
+
+       for_each_online_cpu(cpu) {
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_release_bp_slot(*pevent))
+                       /*
+                        * The debugger is responsible for handing the retry on
+                        * remove failure.
+                        */
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (!breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       switch (bptype) {
+       case BP_HARDWARE_BREAKPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_X;
+               break;
+       case BP_READ_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_R;
+               break;
+       case BP_WRITE_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_W;
+               break;
+       case BP_ACCESS_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return -1;
+       }
+
+       switch (len) {
+       case 1:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+               break;
+       case 2:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+               break;
+       case 4:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+               break;
+       case 8:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -1;
+       }
+
+       breakinfo[i].addr = addr;
+       if (hw_break_reserve_slot(i)) {
+               breakinfo[i].addr = 0;
+               return -1;
+       }
+       breakinfo[i].enabled = 1;
+
+       return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       if (hw_break_release_slot(i)) {
+               pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+               return -1;
+       }
+       breakinfo[i].enabled = 0;
+
+       return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled == 1)
+                       continue;
+
+               arch_uninstall_hw_breakpoint(bp);
+               bp->attr.disabled = 1;
+       }
+
+       /* Disable hardware debugging while we are in kgdb */
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (!bp->attr.disabled) {
+                       arch_uninstall_hw_breakpoint(bp);
+                       bp->attr.disabled = 1;
+                       continue;
+               }
+
+               if (hw_break_release_slot(i))
+                       pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+               breakinfo[i].enabled = 0;
+       }
+
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+       int i, activated = 0;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               struct perf_event *bp;
+               int val;
+               int cpu = raw_smp_processor_id();
+
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled != 1)
+                       continue;
+
+               bp->attr.bp_addr = breakinfo[i].addr;
+               bp->attr.bp_len = breakinfo[i].len;
+               bp->attr.bp_type = breakinfo[i].type;
+
+               val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+               if (val)
+                       return;
+
+               val = arch_install_hw_breakpoint(bp);
+               if (!val)
+                       bp->attr.disabled = 0;
+               activated = 1;
+       }
+
+       csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+       .gdb_bpt_instr          = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+       .flags                  = KGDB_HW_BREAKPOINT,
+       .set_hw_breakpoint      = kgdb_set_hw_break,
+       .remove_hw_breakpoint   = kgdb_remove_hw_break,
+       .disable_hw_break       = kgdb_disable_hw_break,
+       .remove_all_hw_break    = kgdb_remove_all_hw_break,
+       .correct_hw_break       = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+       return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+       int i, cpu;
+       struct perf_event_attr attr;
+       struct perf_event **pevent;
+
+       hw_breakpoint_init(&attr);
+
+       attr.bp_addr = (unsigned long)kgdb_arch_init;
+       attr.bp_len = HW_BREAKPOINT_LEN_4;
+       attr.bp_type = HW_BREAKPOINT_W;
+       attr.disabled = 1;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev)
+                       continue;
+
+               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+               if (IS_ERR((void * __force)breakinfo[i].pev)) {
+                       pr_err("kgdb: Could not allocate hw breakpoints.\n");
+                       breakinfo[i].pev = NULL;
+                       return;
+               }
+
+               for_each_online_cpu(cpu) {
+                       pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+                       if (pevent[0]->destroy) {
+                               pevent[0]->destroy = NULL;
+                               release_bp_slot(*pevent);
+                       }
+               }
+       }
+}
+
+void kgdb_arch_exit(void)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev) {
+                       unregister_wide_hw_breakpoint(breakinfo[i].pev);
+                       breakinfo[i].pev = NULL;
+               }
+       }
+
+       unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644 (file)
index 0000000..9c75120
--- /dev/null
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+       .macro  EX insn, reg, src, offs
+.ex\@: \insn   \reg, \src, \offs
+       _asm_extable .ex\@, .L_lbt_fault
+       .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+       movscr2gr       t1, $scr0               # save scr
+       stptr.d         t1, a0, THREAD_SCR0
+       movscr2gr       t1, $scr1
+       stptr.d         t1, a0, THREAD_SCR1
+       movscr2gr       t1, $scr2
+       stptr.d         t1, a0, THREAD_SCR2
+       movscr2gr       t1, $scr3
+       stptr.d         t1, a0, THREAD_SCR3
+
+       x86mfflag       t1, 0x3f                # save eflags
+       stptr.d         t1, a0, THREAD_EFLAGS
+       jr              ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+       ldptr.d         t1, a0, THREAD_SCR0     # restore scr
+       movgr2scr       $scr0, t1
+       ldptr.d         t1, a0, THREAD_SCR1
+       movgr2scr       $scr1, t1
+       ldptr.d         t1, a0, THREAD_SCR2
+       movgr2scr       $scr2, t1
+       ldptr.d         t1, a0, THREAD_SCR3
+       movgr2scr       $scr3, t1
+
+       ldptr.d         t1, a0, THREAD_EFLAGS   # restore eflags
+       x86mtflag       t1, 0x3f
+       jr              ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+       movgr2scr       $scr0, zero
+       movgr2scr       $scr1, zero
+       movgr2scr       $scr2, zero
+       movgr2scr       $scr3, zero
+
+       x86mtflag       zero, 0x3f
+       jr              ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+       movscr2gr       t1, $scr0               # save scr
+       EX      st.d    t1, a0, (0 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr1
+       EX      st.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr2
+       EX      st.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr3
+       EX      st.d    t1, a0, (3 * SCR_REG_WIDTH)
+
+       x86mfflag       t1, 0x3f                # save eflags
+       EX      st.w    t1, a1, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+       EX      ld.d    t1, a0, (0 * SCR_REG_WIDTH)     # restore scr
+       movgr2scr       $scr0, t1
+       EX      ld.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movgr2scr       $scr1, t1
+       EX      ld.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movgr2scr       $scr2, t1
+       EX      ld.d    t1, a0, (3 * SCR_REG_WIDTH)
+       movgr2scr       $scr3, t1
+
+       EX      ld.w    t1, a1, 0                       # restore eflags
+       x86mtflag       t1, 0x3f
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+       x86mftop        t1
+       st.w            t1, a0, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+       ld.w            t1, a0, 0
+       andi            t1, t1, 0x7
+       la.pcrel        a0, 1f
+       alsl.d          a0, t1, a0, 3
+       jr              a0
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+       li.w            a0, -EFAULT             # failure
+       jr              ra
index 7086658..c7d33c4 100644 (file)
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 
 void __init pcpu_populate_pte(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d = p4d_offset(pgd, addr);
-       pud_t *pud;
-       pmd_t *pmd;
-
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
-#endif
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
-#endif
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
+       populate_kernel_pte(addr);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
 {
        high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
        memblock_free_all();
-       setup_zero_pages();     /* This comes from node 0 */
 }
 
 int pcibus_to_node(struct pci_bus *bus)
index ba457e4..3cb082e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/elf.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
        euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
        regs->csr_euen = euen;
        lose_fpu(0);
+       lose_lbt(0);
 
        clear_thread_flag(TIF_LSX_CTX_LIVE);
        clear_thread_flag(TIF_LASX_CTX_LIVE);
+       clear_thread_flag(TIF_LBT_CTX_LIVE);
        clear_used_math();
        regs->csr_era = pc;
        regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        preempt_enable();
 
-       if (used_math())
-               memcpy(dst, src, sizeof(struct task_struct));
-       else
+       if (!used_math())
                memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+       else
+               memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+       memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
 
        return 0;
 }
@@ -189,8 +196,10 @@ out:
        ptrace_hw_copy_thread(p);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
        clear_tsk_thread_flag(p, TIF_USEDSIMD);
+       clear_tsk_thread_flag(p, TIF_USEDLBT);
        clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
        clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+       clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
 
        return 0;
 }
index f72adbf..c114c5e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
 
 #endif /* CONFIG_CPU_HAS_LSX */
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+                  const struct user_regset *regset,
+                  struct membuf to)
+{
+       int r;
+
+       r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+       r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+       r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+       r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+       r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+       r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+       return r;
+}
+
+static int lbt_set(struct task_struct *target,
+                  const struct user_regset *regset,
+                  unsigned int pos, unsigned int count,
+                  const void *kbuf, const void __user *ubuf)
+{
+       int err = 0;
+       const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+       const int ftop_start = eflags_start + sizeof(u32);
+
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.scr0,
+                                 0, 4 * sizeof(target->thread.lbt.scr0));
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.eflags,
+                                 eflags_start, ftop_start);
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpu.ftop,
+                                 ftop_start, ftop_start + sizeof(u32));
+
+       return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 /*
@@ -802,6 +843,9 @@ enum loongarch_regset {
 #ifdef CONFIG_CPU_HAS_LASX
        REGSET_LASX,
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       REGSET_LBT,
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        REGSET_HW_BREAK,
        REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
                .set            = simd_set,
        },
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       [REGSET_LBT] = {
+               .core_note_type = NT_LOONGARCH_LBT,
+               .n              = 5,
+               .size           = sizeof(u64),
+               .align          = sizeof(u64),
+               .regset_get     = lbt_get,
+               .set            = lbt_set,
+       },
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        [REGSET_HW_BREAK] = {
                .core_note_type = NT_LOONGARCH_HW_BREAK,
index 01f94d1..6c3eff9 100644 (file)
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
        *new_addr = (unsigned long)reloc_offset;
 }
 
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
 {
        unsigned long kernel_length;
        unsigned long random_offset = 0;
        void *location_new = _text; /* Default to original kernel start */
-       void *kernel_entry = start_kernel; /* Default to original kernel entry point */
        char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
 
        strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
 
                reloc_offset += random_offset;
 
-               /* Return the new kernel's entry point */
-               kernel_entry = RELOCATED_KASLR(start_kernel);
-
                /* The current thread is now within the relocated kernel */
                __current_thread_info = RELOCATED_KASLR(__current_thread_info);
 
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
 
        relocate_absolute(random_offset);
 
-       return kernel_entry;
+       return random_offset;
 }
 
 /*
index 9d830ab..7783f0a 100644 (file)
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        paging_init();
+
+#ifdef CONFIG_KASAN
+       kasan_init();
+#endif
 }
index ceb8993..504fdfe 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
 
@@ -44,6 +45,9 @@
 /* Make sure we will not lose FPU ownership */
 #define lock_fpu_owner()       ({ preempt_disable(); pagefault_disable(); })
 #define unlock_fpu_owner()     ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner()       ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner()     ({ pagefault_enable(); preempt_enable(); })
 
 /* Assembly functions to move context to/from the FPU */
 extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 extern asmlinkage int
 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
 struct rt_sigframe {
        struct siginfo rs_info;
        struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
        struct _ctx_layout fpu;
        struct _ctx_layout lsx;
        struct _ctx_layout lasx;
+       struct _ctx_layout lbt;
        struct _ctx_layout end;
 };
 
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
        return err;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __put_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __get_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
 /*
  * Wrappers for the assembly _{save,restore}_fp_context functions.
  */
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
        return _restore_lasx_context(regs, fcc, fcsr);
 }
 
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _restore_ftop_context(ftop);
+}
+#endif
+
 static int fcsr_pending(unsigned int __user *fcsr)
 {
        int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
        return err ?: sig;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= save_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_to_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= save_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_to_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+               err |= __put_user(extctx->lbt.size, &info->size);
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0, tmp __maybe_unused;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= restore_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_from_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= restore_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_from_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+#endif
+
 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                            struct extctx_layout *extctx)
 {
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        else if (extctx->fpu.addr)
                err |= protected_save_fpu_context(extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx->lbt.addr)
+               err |= protected_save_lbt_context(extctx);
+#endif
+
        /* Set the "end" magic */
        info = (struct sctx_info *)extctx->end.addr;
        err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
                        extctx->lasx.addr = info;
                        break;
 
+               case LBT_CTX_MAGIC:
+                       if (size < (sizeof(struct sctx_info) +
+                                   sizeof(struct lbt_context)))
+                               goto invalid;
+                       extctx->lbt.addr = info;
+                       break;
+
                default:
                        goto invalid;
                }
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
        else if (extctx.fpu.addr)
                err |= protected_restore_fpu_context(&extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx.lbt.addr)
+               err |= protected_restore_lbt_context(&extctx);
+#endif
+
 bad:
        return err;
 }
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
                          sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
        }
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (cpu_has_lbt && thread_lbt_context_live()) {
+               new_sp = extframe_alloc(extctx, &extctx->lbt,
+                         sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+       }
+#endif
+
        return new_sp;
 }
 
index 2463d2f..92270f1 100644 (file)
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
        struct pt_regs dummyregs;
        struct unwind_state state;
 
-       regs = &dummyregs;
+       if (!regs) {
+               regs = &dummyregs;
 
-       if (task == current) {
-               regs->regs[3] = (unsigned long)__builtin_frame_address(0);
-               regs->csr_era = (unsigned long)__builtin_return_address(0);
-       } else {
-               regs->regs[3] = thread_saved_fp(task);
-               regs->csr_era = thread_saved_ra(task);
+               if (task == current) {
+                       regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+                       regs->csr_era = (unsigned long)__builtin_return_address(0);
+               } else {
+                       regs->regs[3] = thread_saved_fp(task);
+                       regs->csr_era = thread_saved_ra(task);
+               }
+               regs->regs[1] = 0;
        }
 
-       regs->regs[1] = 0;
        for (unwind_start(&state, task, regs);
             !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
                addr = unwind_get_return_address(&state);
index 89699db..6521477 100644 (file)
@@ -36,7 +36,9 @@
 #include <asm/break.h>
 #include <asm/cpu.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/inst.h>
+#include <asm/kgdb.h>
 #include <asm/loongarch.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
         * pertain to them.
         */
        switch (bcode) {
+       case BRK_KDB:
+               if (kgdb_breakpoint_handler(regs))
+                       goto out;
+               else
+                       break;
        case BRK_KPROBE_BP:
                if (kprobe_breakpoint_handler(regs))
                        goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
        pr_warn("Hardware watch point handler not implemented!\n");
 #else
+       if (kgdb_breakpoint_handler(regs))
+               goto out;
+
        if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
                int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
                unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
        irqentry_exit(regs, state);
 }
 
+static void init_restore_lbt(void)
+{
+       if (!thread_lbt_context_live()) {
+               /* First time LBT context user */
+               init_lbt();
+               set_thread_flag(TIF_LBT_CTX_LIVE);
+       } else {
+               if (!is_lbt_owner())
+                       own_lbt_inatomic(1);
+       }
+
+       BUG_ON(!is_lbt_enabled());
+}
+
 asmlinkage void noinstr do_lbt(struct pt_regs *regs)
 {
        irqentry_state_t state = irqentry_enter(regs);
 
-       local_irq_enable();
-       force_sig(SIGILL);
-       local_irq_disable();
+       /*
+        * BTD (Binary Translation Disable exception) can be triggered
+        * during FP save/restore if TM (Top Mode) is on, which may
+        * cause irq_enable during 'switch_to'. To avoid this situation
+        * (including the user using 'MOVGR2GCSR' to turn on TM, which
+        * will not trigger the BTE), we need to check PRMD first.
+        */
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_enable();
+
+       if (!cpu_has_lbt) {
+               force_sig(SIGILL);
+               goto out;
+       }
+       BUG_ON(is_lbt_enabled());
+
+       preempt_disable();
+       init_restore_lbt();
+       preempt_enable();
+
+out:
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_disable();
 
        irqentry_exit(regs, state);
 }
index d60d4e0..a77bf16 100644 (file)
@@ -6,4 +6,6 @@
 lib-y  += delay.o memset.o memcpy.o memmove.o \
           clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
index 0790ead..be74154 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a1, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__clear_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
 2:     move    a0, a1
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
+       _asm_extable 1b, 2b
 SYM_FUNC_END(__clear_user_generic)
 
 /*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_1
-       _asm_extable 3b, .L_fixup_handle_2
-       _asm_extable 4b, .L_fixup_handle_3
-       _asm_extable 5b, .L_fixup_handle_4
-       _asm_extable 6b, .L_fixup_handle_5
-       _asm_extable 7b, .L_fixup_handle_6
-       _asm_extable 8b, .L_fixup_handle_7
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_1
-       _asm_extable 11b, .L_fixup_handle_2
-       _asm_extable 12b, .L_fixup_handle_3
-       _asm_extable 13b, .L_fixup_handle_0
-       _asm_extable 14b, .L_fixup_handle_1
-       _asm_extable 15b, .L_fixup_handle_0
-       _asm_extable 16b, .L_fixup_handle_0
-       _asm_extable 17b, .L_fixup_handle_s0
-       _asm_extable 18b, .L_fixup_handle_s0
-       _asm_extable 19b, .L_fixup_handle_s0
-       _asm_extable 20b, .L_fixup_handle_s2
-       _asm_extable 21b, .L_fixup_handle_s0
-       _asm_extable 22b, .L_fixup_handle_s0
-       _asm_extable 23b, .L_fixup_handle_s4
-       _asm_extable 24b, .L_fixup_handle_s0
-       _asm_extable 25b, .L_fixup_handle_s4
-       _asm_extable 26b, .L_fixup_handle_s0
-       _asm_extable 27b, .L_fixup_handle_s4
-       _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a1, a2, a0
+
+.Lsmall_fixup:
+29:    st.b    zero, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, -1
+       bgt     a1, zero, 29b
+
+.Lexit:
+       move    a0, a1
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Llarge_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Lexit
+       _asm_extable 18b, .Lsmall_fixup
+       _asm_extable 19b, .Lsmall_fixup
+       _asm_extable 20b, .Lsmall_fixup
+       _asm_extable 21b, .Lsmall_fixup
+       _asm_extable 22b, .Lsmall_fixup
+       _asm_extable 23b, .Lsmall_fixup
+       _asm_extable 24b, .Lsmall_fixup
+       _asm_extable 25b, .Lsmall_fixup
+       _asm_extable 26b, .Lsmall_fixup
+       _asm_extable 27b, .Lsmall_fixup
+       _asm_extable 28b, .Lsmall_fixup
+       _asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
index bfe3d27..feec3d3 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a2, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__copy_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
 3:     move    a0, a2
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
-       _asm_extable 2b, .L_fixup_handle_s0
+       _asm_extable 1b, 3b
+       _asm_extable 2b, 3b
 SYM_FUNC_END(__copy_user_generic)
 
 /*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
        sltui   t0, a2, 9
        bnez    t0, .Lsmall
 
-       add.d   a3, a1, a2
-       add.d   a2, a0, a2
 0:     ld.d    t0, a1, 0
 1:     st.d    t0, a0, 0
+       add.d   a3, a1, a2
+       add.d   a2, a0, a2
 
        /* align up destination address */
        andi    t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
 7:     ld.d    t5, a1, 40
 8:     ld.d    t6, a1, 48
 9:     ld.d    t7, a1, 56
-       addi.d  a1, a1, 64
 10:    st.d    t0, a0, 0
 11:    st.d    t1, a0, 8
 12:    st.d    t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
 15:    st.d    t5, a0, 40
 16:    st.d    t6, a0, 48
 17:    st.d    t7, a0, 56
+       addi.d  a1, a1, 64
        addi.d  a0, a0, 64
        bltu    a1, a4, .Lloop64
 
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
 19:    ld.d    t1, a1, 8
 20:    ld.d    t2, a1, 16
 21:    ld.d    t3, a1, 24
-       addi.d  a1, a1, 32
 22:    st.d    t0, a0, 0
 23:    st.d    t1, a0, 8
 24:    st.d    t2, a0, 16
 25:    st.d    t3, a0, 24
+       addi.d  a1, a1, 32
        addi.d  a0, a0, 32
 
 .Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt16
 26:    ld.d    t0, a1, 0
 27:    ld.d    t1, a1, 8
-       addi.d  a1, a1, 16
 28:    st.d    t0, a0, 0
 29:    st.d    t1, a0, 8
+       addi.d  a1, a1, 16
        addi.d  a0, a0, 16
 
 .Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt8
 30:    ld.d    t0, a1, 0
 31:    st.d    t0, a0, 0
+       addi.d  a1, a1, 8
        addi.d  a0, a0, 8
 
 .Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_0
-       _asm_extable 3b, .L_fixup_handle_0
-       _asm_extable 4b, .L_fixup_handle_0
-       _asm_extable 5b, .L_fixup_handle_0
-       _asm_extable 6b, .L_fixup_handle_0
-       _asm_extable 7b, .L_fixup_handle_0
-       _asm_extable 8b, .L_fixup_handle_0
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_0
-       _asm_extable 11b, .L_fixup_handle_1
-       _asm_extable 12b, .L_fixup_handle_2
-       _asm_extable 13b, .L_fixup_handle_3
-       _asm_extable 14b, .L_fixup_handle_4
-       _asm_extable 15b, .L_fixup_handle_5
-       _asm_extable 16b, .L_fixup_handle_6
-       _asm_extable 17b, .L_fixup_handle_7
-       _asm_extable 18b, .L_fixup_handle_0
-       _asm_extable 19b, .L_fixup_handle_0
-       _asm_extable 20b, .L_fixup_handle_0
-       _asm_extable 21b, .L_fixup_handle_0
-       _asm_extable 22b, .L_fixup_handle_0
-       _asm_extable 23b, .L_fixup_handle_1
-       _asm_extable 24b, .L_fixup_handle_2
-       _asm_extable 25b, .L_fixup_handle_3
-       _asm_extable 26b, .L_fixup_handle_0
-       _asm_extable 27b, .L_fixup_handle_0
-       _asm_extable 28b, .L_fixup_handle_0
-       _asm_extable 29b, .L_fixup_handle_1
-       _asm_extable 30b, .L_fixup_handle_0
-       _asm_extable 31b, .L_fixup_handle_0
-       _asm_extable 32b, .L_fixup_handle_0
-       _asm_extable 33b, .L_fixup_handle_0
-       _asm_extable 34b, .L_fixup_handle_s0
-       _asm_extable 35b, .L_fixup_handle_s0
-       _asm_extable 36b, .L_fixup_handle_s0
-       _asm_extable 37b, .L_fixup_handle_s0
-       _asm_extable 38b, .L_fixup_handle_s0
-       _asm_extable 39b, .L_fixup_handle_s0
-       _asm_extable 40b, .L_fixup_handle_s0
-       _asm_extable 41b, .L_fixup_handle_s2
-       _asm_extable 42b, .L_fixup_handle_s0
-       _asm_extable 43b, .L_fixup_handle_s0
-       _asm_extable 44b, .L_fixup_handle_s0
-       _asm_extable 45b, .L_fixup_handle_s0
-       _asm_extable 46b, .L_fixup_handle_s0
-       _asm_extable 47b, .L_fixup_handle_s4
-       _asm_extable 48b, .L_fixup_handle_s0
-       _asm_extable 49b, .L_fixup_handle_s0
-       _asm_extable 50b, .L_fixup_handle_s0
-       _asm_extable 51b, .L_fixup_handle_s4
-       _asm_extable 52b, .L_fixup_handle_s0
-       _asm_extable 53b, .L_fixup_handle_s0
-       _asm_extable 54b, .L_fixup_handle_s0
-       _asm_extable 55b, .L_fixup_handle_s4
-       _asm_extable 56b, .L_fixup_handle_s0
-       _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a2, a2, a0
+
+.Lsmall_fixup:
+58:    ld.b    t0, a1, 0
+59:    st.b    t0, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, 1
+       addi.d  a2, a2, -1
+       bgt     a2, zero, 58b
+
+.Lexit:
+       move    a0, a2
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Lsmall_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Llarge_fixup
+       _asm_extable 18b, .Llarge_fixup
+       _asm_extable 19b, .Llarge_fixup
+       _asm_extable 20b, .Llarge_fixup
+       _asm_extable 21b, .Llarge_fixup
+       _asm_extable 22b, .Llarge_fixup
+       _asm_extable 23b, .Llarge_fixup
+       _asm_extable 24b, .Llarge_fixup
+       _asm_extable 25b, .Llarge_fixup
+       _asm_extable 26b, .Llarge_fixup
+       _asm_extable 27b, .Llarge_fixup
+       _asm_extable 28b, .Llarge_fixup
+       _asm_extable 29b, .Llarge_fixup
+       _asm_extable 30b, .Llarge_fixup
+       _asm_extable 31b, .Llarge_fixup
+       _asm_extable 32b, .Llarge_fixup
+       _asm_extable 33b, .Llarge_fixup
+       _asm_extable 34b, .Lexit
+       _asm_extable 35b, .Lexit
+       _asm_extable 36b, .Lsmall_fixup
+       _asm_extable 37b, .Lsmall_fixup
+       _asm_extable 38b, .Lsmall_fixup
+       _asm_extable 39b, .Lsmall_fixup
+       _asm_extable 40b, .Lsmall_fixup
+       _asm_extable 41b, .Lsmall_fixup
+       _asm_extable 42b, .Lsmall_fixup
+       _asm_extable 43b, .Lsmall_fixup
+       _asm_extable 44b, .Lsmall_fixup
+       _asm_extable 45b, .Lsmall_fixup
+       _asm_extable 46b, .Lsmall_fixup
+       _asm_extable 47b, .Lsmall_fixup
+       _asm_extable 48b, .Lsmall_fixup
+       _asm_extable 49b, .Lsmall_fixup
+       _asm_extable 50b, .Lsmall_fixup
+       _asm_extable 51b, .Lsmall_fixup
+       _asm_extable 52b, .Lsmall_fixup
+       _asm_extable 53b, .Lsmall_fixup
+       _asm_extable 54b, .Lsmall_fixup
+       _asm_extable 55b, .Lsmall_fixup
+       _asm_extable 56b, .Lsmall_fixup
+       _asm_extable 57b, .Lsmall_fixup
+       _asm_extable 58b, .Lexit
+       _asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
index cc30b3b..fa11488 100644 (file)
@@ -10,6 +10,8 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memcpy)
        /*
         * Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
        ALTERNATIVE     "b __memcpy_generic", \
                        "b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
 
 EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
 
 /*
  * void *__memcpy_generic(void *dst, const void *src, size_t n)
index 7dc76d1..82dae06 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memmove)
-       blt     a0, a1, memcpy  /* dst < src, memcpy */
-       blt     a1, a0, rmemcpy /* src < dst, rmemcpy */
-       jr      ra              /* dst == src, return */
+       blt     a0, a1, __memcpy        /* dst < src, memcpy */
+       blt     a1, a0, __rmemcpy       /* src < dst, rmemcpy */
+       jr      ra                      /* dst == src, return */
 SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
 
 EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
 
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
        /*
         * Some CPUs support hardware unaligned access
         */
        ALTERNATIVE     "b __rmemcpy_generic", \
                        "b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
index 3f20f79..06d3ca5 100644 (file)
@@ -16,6 +16,8 @@
        bstrins.d \r0, \r0, 63, 32
 .endm
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memset)
        /*
         * Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
        ALTERNATIVE     "b __memset_generic", \
                        "b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
 
 EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
 
 /*
  * void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644 (file)
index 0000000..84cd24b
--- /dev/null
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)  \
+       "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 16)         \
+       LD(2, base, 32)         \
+       LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(4, base, 0)          \
+       LD(5, base, 16)         \
+       LD(6, base, 32)         \
+       LD(7, base, 48)         \
+       XOR(0, 4)               \
+       XOR(1, 5)               \
+       XOR(2, 6)               \
+       XOR(3, 7)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 16)         \
+       ST(2, base, 32)         \
+       ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)  \
+       "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(2, base, 0)          \
+       LD(3, base, 32)         \
+       XOR(0, 2)               \
+       XOR(1, 3)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644 (file)
index 0000000..f50f325
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644 (file)
index 0000000..393f689
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)                                                        \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_2(bytes, p1, p2);                                      \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor)                                                        \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_3(bytes, p1, p2, p3);                                  \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor)                                                        \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_4(bytes, p1, p2, p3, p4);                              \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor)                                                        \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4,                      \
+                     const unsigned long * __restrict p5)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);                          \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor)         \
+       MAKE_XOR_GLUE_2(flavor);        \
+       MAKE_XOR_GLUE_3(flavor);        \
+       MAKE_XOR_GLUE_4(flavor);        \
+       MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644 (file)
index 0000000..0358ced
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+               : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4,
+                     const unsigned long * __restrict v5)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       LD_AND_XOR_LINE(v5)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+                   [v5] "r"(v5) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+               v5 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
index 8ffc638..e4d1e58 100644 (file)
@@ -7,3 +7,6 @@ obj-y                           += init.o cache.o tlb.o tlbex.o extable.o \
                                   fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
 
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
+obj-$(CONFIG_KASAN)            += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o     := n
index 72685a4..6be04d3 100644 (file)
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
 
        current_cpu_data.cache_leaves_present = leaf;
        current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
-       shm_align_mask = PAGE_SIZE - 1;
 }
 
 static const pgprot_t protection_map[16] = {
index da5b6d5..e6376e3 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        if (fixup_exception(regs))
                return;
 
+       if (kfence_handle_page_fault(address, write, regs))
+               return;
+
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        /*
         * We ran out of memory, call the OOM killer, and return the userspace
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
        pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
 {
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
 
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
         */
        if (address & __UA_LIMIT) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                else
                        do_sigsegv(regs, write, address, si_code);
                return;
@@ -211,7 +217,7 @@ good_area:
 
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                return;
        }
 
@@ -232,7 +238,7 @@ good_area:
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mmap_read_unlock(mm);
                if (fault & VM_FAULT_OOM) {
-                       do_out_of_memory(regs, address);
+                       do_out_of_memory(regs, write, address);
                        return;
                } else if (fault & VM_FAULT_SIGSEGV) {
                        do_sigsegv(regs, write, address, si_code);
index 3b7d812..f3fe8c0 100644 (file)
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed.         Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
-       unsigned int order, i;
-       struct page *page;
-
-       order = 0;
-
-       empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-       if (!empty_zero_page)
-               panic("Oh boy, that early out of memory?");
-
-       page = virt_to_page((void *)empty_zero_page);
-       split_page(page, order);
-       for (i = 0; i < (1 << order); i++, page++)
-               mark_page_reserved(page);
-
-       zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
 
 void copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
        high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
        memblock_free_all();
-       setup_zero_pages();     /* Setup zeroed pages.  */
 }
 #endif /* !CONFIG_NUMA */
 
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
 #endif
 #endif
 
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
 {
-       pgd_t *pgd;
-       p4d_t *p4d;
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
        pud_t *pud;
        pmd_t *pmd;
 
-       pgd = pgd_offset_k(addr);
-       p4d = p4d_offset(pgd, addr);
-
-       if (pgd_none(*pgd)) {
-               pud_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
+       if (p4d_none(*p4d)) {
+               pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pud)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               p4d_populate(&init_mm, p4d, pud);
 #ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
+               pud_init(pud);
 #endif
        }
 
        pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
-               pmd_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
+               pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pmd)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pud_populate(&init_mm, pud, pmd);
 #ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
+               pmd_init(pmd);
 #endif
        }
 
        pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd)) {
-               pte_t *new __maybe_unused;
+       if (!pmd_present(*pmd)) {
+               pte_t *pte;
 
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
+               pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pte)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pmd_populate_kernel(&init_mm, pmd, pte);
        }
 
        return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       ptep = fixmap_pte(addr);
+       ptep = populate_kernel_pte(addr);
        if (!pte_none(*ptep)) {
                pte_ERROR(*ptep);
                return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..da68bc1
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+       void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+                                       __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+       if (!p)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+                       __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+       return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+       if (__pmd_none(early, READ_ONCE(*pmdp))) {
+               phys_addr_t pte_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+               pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+       }
+
+       return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+       if (__pud_none(early, READ_ONCE(*pudp))) {
+               phys_addr_t pmd_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+               pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+       }
+
+       return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+       if (__p4d_none(early, READ_ONCE(*p4dp))) {
+               phys_addr_t pud_phys = early ?
+                       __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+               p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+       }
+
+       return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+       do {
+               phys_addr_t page_phys = early ?
+                                       __pa_symbol(kasan_early_shadow_page)
+                                             : kasan_alloc_zeroed_page(node);
+               next = addr + PAGE_SIZE;
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+       do {
+               next = pmd_addr_end(addr, end);
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+       do {
+               next = pud_addr_end(addr, end);
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+       do {
+               next = p4d_addr_end(addr, end);
+               kasan_pud_populate(p4dp, addr, next, node, early);
+       } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+                                     int node, bool early)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+
+       pgdp = pgd_offset_k(addr);
+
+       do {
+               next = pgd_addr_end(addr, end);
+               kasan_p4d_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+                                     int node)
+{
+       kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+       WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+       /*
+        * Remove references to kasan page tables from
+        * swapper_pg_dir. pgd_clear() can't be used
+        * here because it's nop on 2,3-level pagetable setups
+        */
+       for (; start < end; start += PGDIR_SIZE)
+               kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+       u64 i;
+       phys_addr_t pa_start, pa_end;
+
+       /*
+        * PGD was populated as invalid_pmd_table or invalid_pud_table
+        * in pagetable_init() which depends on how many levels of page
+        * table you are using, but we had to clean the gpd of kasan
+        * shadow memory, as the pgd value is none-zero.
+        * The assertion pgd_none is going to be false and the formal populate
+        * afterwards is not going to create any new pgd at all.
+        */
+       memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+       csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       /* Maps everything to a single page of zeroes */
+       kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+       kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+                                       kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+       kasan_early_stage = false;
+
+       /* Populate the linear mapping */
+       for_each_mem_range(i, &pa_start, &pa_end) {
+               void *start = (void *)phys_to_virt(pa_start);
+               void *end   = (void *)phys_to_virt(pa_end);
+
+               if (start >= end)
+                       break;
+
+               kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+                       (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+       }
+
+       /* Populate modules mapping */
+       kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+               (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+       /*
+        * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_early_shadow_pte[i],
+                       pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       /* At this point kasan is fully initialized. Enable error messages */
+       init_task.kasan_depth = 0;
+       pr_info("KernelAddressSanitizer initialized.\n");
+}
index fbe1a48..a9630a8 100644 (file)
@@ -8,12 +8,11 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
 
-#define COLOUR_ALIGN(addr, pgoff)                              \
-       ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
-        (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff)                      \
+       ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)  \
+        + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
 
 enum mmap_allocation_direction {UP, DOWN};
 
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
                 * cache aliasing constraints.
                 */
                if ((flags & MAP_SHARED) &&
-                   ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+                   ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
                        return -EINVAL;
                return addr;
        }
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
        }
 
        info.length = len;
-       info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
        info.align_offset = pgoff << PAGE_SHIFT;
 
        if (dir == DOWN) {
index b14343e..71d0539 100644 (file)
@@ -9,6 +9,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        pgd_t *init, *ret = NULL;
index a50308b..5c97d14 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
 
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h
deleted file mode 100644 (file)
index 05cc7dc..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/* Copyright(c) 1996 Kars de Jong */
-/* Based on the ide driver from 1.2.13pl8 */
-
-/*
- * Credits (alphabetical):
- *
- *  - Bjoern Brauel
- *  - Kars de Jong
- *  - Torsten Ebeling
- *  - Dwight Engen
- *  - Thorsten Floeck
- *  - Roman Hodek
- *  - Guenther Kelleter
- *  - Chris Lawrence
- *  - Michael Rausch
- *  - Christian Sauer
- *  - Michael Schmitz
- *  - Jes Soerensen
- *  - Michael Thurm
- *  - Geert Uytterhoeven
- */
-
-#ifndef _M68K_IDE_H
-#define _M68K_IDE_H
-
-#ifdef __KERNEL__
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#ifdef CONFIG_MMU
-
-/*
- * Get rid of defs from io.h - ide has its private and conflicting versions
- * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
- * always use the `raw' MMIO versions
- */
-#undef readb
-#undef readw
-#undef writeb
-#undef writew
-
-#define readb                          in_8
-#define readw                          in_be16
-#define __ide_mm_insw(port, addr, n)   raw_insw((u16 *)port, addr, n)
-#define __ide_mm_insl(port, addr, n)   raw_insl((u32 *)port, addr, n)
-#define writeb(val, port)              out_8(port, val)
-#define writew(val, port)              out_be16(port, val)
-#define __ide_mm_outsw(port, addr, n)  raw_outsw((u16 *)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)  raw_outsl((u32 *)port, addr, n)
-
-#else
-
-#define __ide_mm_insw(port, addr, n)   io_insw((unsigned int)port, addr, n)
-#define __ide_mm_insl(port, addr, n)   io_insl((unsigned int)port, addr, n)
-#define __ide_mm_outsw(port, addr, n)  io_outsw((unsigned int)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)  io_outsl((unsigned int)port, addr, n)
-
-#endif /* CONFIG_MMU */
-
-#endif /* __KERNEL__ */
-#endif /* _M68K_IDE_H */
index 337f23e..86a4ce0 100644 (file)
@@ -99,9 +99,6 @@ extern int page_is_ram(unsigned long pfn);
 # define phys_to_pfn(phys)     (PFN_DOWN(phys))
 # define pfn_to_phys(pfn)      (PFN_PHYS(pfn))
 
-# define virt_to_pfn(vaddr)    (phys_to_pfn((__pa(vaddr))))
-# define pfn_to_virt(pfn)      __va(pfn_to_phys((pfn)))
-
 #  define virt_to_page(kaddr)  (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
 #  define page_to_virt(page)   __va(page_to_pfn(page) << PAGE_SHIFT)
 #  define page_to_phys(page)     (page_to_pfn(page) << PAGE_SHIFT)
@@ -109,11 +106,6 @@ extern int page_is_ram(unsigned long pfn);
 #  define ARCH_PFN_OFFSET      (memory_start >> PAGE_SHIFT)
 # endif /* __ASSEMBLY__ */
 
-#define        virt_addr_valid(vaddr)  (pfn_valid(virt_to_pfn(vaddr)))
-
-# define __pa(x)       __virt_to_phys((unsigned long)(x))
-# define __va(x)       ((void *)__phys_to_virt((unsigned long)(x)))
-
 /* Convert between virtual and physical address for MMU. */
 /* Handle MicroBlaze processor with virtual memory. */
 #define __virt_to_phys(addr) \
@@ -125,6 +117,25 @@ extern int page_is_ram(unsigned long pfn);
 #define tovirt(rd, rs) \
        addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
 
+#ifndef __ASSEMBLY__
+
+# define __pa(x)       __virt_to_phys((unsigned long)(x))
+# define __va(x)       ((void *)__phys_to_virt((unsigned long)(x)))
+
+static inline unsigned long virt_to_pfn(const void *vaddr)
+{
+       return phys_to_pfn(__pa(vaddr));
+}
+
+static inline const void *pfn_to_virt(unsigned long pfn)
+{
+       return __va(pfn_to_phys((pfn)));
+}
+
+#define        virt_addr_valid(vaddr)  (pfn_valid(virt_to_pfn(vaddr)))
+
+#endif /* __ASSEMBLY__ */
+
 #define TOPHYS(addr)  __virt_to_phys(addr)
 
 #endif /* __KERNEL__ */
index 3657f5e..bf2600f 100644 (file)
@@ -25,7 +25,5 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SETUP_H */
index 5f47229..2f66c79 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/of_platform.h>
 #include <linux/reboot.h>
 
 void machine_shutdown(void)
index 353fabd..3827dc7 100644 (file)
@@ -270,22 +270,6 @@ asmlinkage void __init mmu_init(void)
        memblock_dump_all();
 }
 
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-       void *p;
-
-       if (mem_init_done) {
-               p = kzalloc(size, mask);
-       } else {
-               p = memblock_alloc(size, SMP_CACHE_BYTES);
-               if (!p)
-                       panic("%s: Failed to allocate %zu bytes\n",
-                             __func__, size);
-       }
-
-       return p;
-}
-
 static const pgprot_t protection_map[16] = {
        [VM_NONE]                                       = PAGE_NONE,
        [VM_READ]                                       = PAGE_READONLY_X,
index a47593d..f49807e 100644 (file)
@@ -181,12 +181,16 @@ endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
 
-cflags-$(CONFIG_CPU_LOONGSON2E) += $(call cc-option,-march=loongson2e) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2F) += $(call cc-option,-march=loongson2f) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap
 # Some -march= flags enable MMI instructions, and GCC complains about that
 # support being enabled alongside -msoft-float. Thus explicitly disable MMI.
 cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi)
+ifdef CONFIG_CPU_LOONGSON64
+cflags-$(CONFIG_CPU_LOONGSON64)        += -Wa,--trap
+cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a
+cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2
+endif
 cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi)
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
@@ -299,8 +303,8 @@ ifdef CONFIG_64BIT
     endif
   endif
 
-  ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
-    cflags-y += -msym32 -DKBUILD_64BIT_SYM32
+  ifeq ($(KBUILD_SYM32)y)
+    cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
   else
     ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
       $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
@@ -341,7 +345,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 KBUILD_LDFLAGS         += -m $(ld-emul)
 
-ifdef CONFIG_MIPS
+ifdef need-compiler
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
        grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
        sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
index 053805c..ec180ab 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/of_clk.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 #include <linux/smp.h>
 #include <asm/addrspace.h>
index c8a8c6d..3395acd 100644 (file)
@@ -12,7 +12,8 @@
 #include <linux/semaphore.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/octeon/octeon.h>
index 25860fb..fef0c6d 100644 (file)
@@ -13,9 +13,9 @@
  * Mnemonic names for arguments to memcpy/__copy_user
  */
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
index 235c77c..f76783c 100644 (file)
@@ -8,8 +8,10 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
+#include <linux/platform_device.h>
 #include <linux/libfdt.h>
 
 #include <asm/octeon/octeon.h>
index 44821f4..dc49b09 100644 (file)
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index 2b41331..07839a4 100644 (file)
@@ -283,6 +283,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AMD_ACP=y
 CONFIG_DRM_AMD_DC=y
 CONFIG_DRM_AMD_DC_SI=y
+CONFIG_DRM_AST=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_QXL=y
 CONFIG_DRM_VIRTIO_GPU=y
index 7432090..ae1a779 100644 (file)
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index dd2b9c1..c07e30f 100644 (file)
@@ -131,7 +131,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index 97c2d7f..0a57010 100644 (file)
@@ -128,7 +128,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index e0e312d..5c5e218 100644 (file)
@@ -90,7 +90,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index dee1727..7ba67a0 100644 (file)
@@ -7,7 +7,6 @@ generated-y += unistd_nr_n32.h
 generated-y += unistd_nr_n64.h
 generated-y += unistd_nr_o32.h
 
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
index 04cedf9..54a85f1 100644 (file)
@@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 
 #endif /* __MIPS_KVM_HOST_H__ */
index 7971272..84f4546 100644 (file)
@@ -45,8 +45,6 @@
 #define LS1X_NAND_BASE                 0x1fe78000
 #define LS1X_CLK_BASE                  0x1fe78030
 
-#include <regs-clk.h>
 #include <regs-mux.h>
-#include <regs-rtc.h>
 
 #endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h
deleted file mode 100644 (file)
index 98136fa..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 Clock Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H
-#define __ASM_MACH_LOONGSON32_REGS_CLK_H
-
-#define LS1X_CLK_REG(x) \
-               ((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x)))
-
-#define LS1X_CLK_PLL_FREQ              LS1X_CLK_REG(0x0)
-#define LS1X_CLK_PLL_DIV               LS1X_CLK_REG(0x4)
-
-#if defined(CONFIG_LOONGSON1_LS1B)
-/* Clock PLL Divisor Register Bits */
-#define DIV_DC_EN                      BIT(31)
-#define DIV_DC_RST                     BIT(30)
-#define DIV_CPU_EN                     BIT(25)
-#define DIV_CPU_RST                    BIT(24)
-#define DIV_DDR_EN                     BIT(19)
-#define DIV_DDR_RST                    BIT(18)
-#define RST_DC_EN                      BIT(5)
-#define RST_DC                         BIT(4)
-#define RST_DDR_EN                     BIT(3)
-#define RST_DDR                                BIT(2)
-#define RST_CPU_EN                     BIT(1)
-#define RST_CPU                                BIT(0)
-
-#define DIV_DC_SHIFT                   26
-#define DIV_CPU_SHIFT                  20
-#define DIV_DDR_SHIFT                  14
-
-#define DIV_DC_WIDTH                   4
-#define DIV_CPU_WIDTH                  4
-#define DIV_DDR_WIDTH                  4
-
-#define BYPASS_DC_SHIFT                        12
-#define BYPASS_DDR_SHIFT               10
-#define BYPASS_CPU_SHIFT               8
-
-#define BYPASS_DC_WIDTH                        1
-#define BYPASS_DDR_WIDTH               1
-#define BYPASS_CPU_WIDTH               1
-
-#elif defined(CONFIG_LOONGSON1_LS1C)
-/* PLL/SDRAM Frequency configuration register Bits */
-#define PLL_VALID                      BIT(31)
-#define FRAC_N                         GENMASK(23, 16)
-#define RST_TIME                       GENMASK(3, 2)
-#define SDRAM_DIV                      GENMASK(1, 0)
-
-/* CPU/CAMERA/DC Frequency configuration register Bits */
-#define DIV_DC_EN                      BIT(31)
-#define DIV_DC                         GENMASK(30, 24)
-#define DIV_CAM_EN                     BIT(23)
-#define DIV_CAM                                GENMASK(22, 16)
-#define DIV_CPU_EN                     BIT(15)
-#define DIV_CPU                                GENMASK(14, 8)
-#define DIV_DC_SEL_EN                  BIT(5)
-#define DIV_DC_SEL                     BIT(4)
-#define DIV_CAM_SEL_EN                 BIT(3)
-#define DIV_CAM_SEL                    BIT(2)
-#define DIV_CPU_SEL_EN                 BIT(1)
-#define DIV_CPU_SEL                    BIT(0)
-
-#define DIV_DC_SHIFT                   24
-#define DIV_CAM_SHIFT                  16
-#define DIV_CPU_SHIFT                  8
-#define DIV_DDR_SHIFT                  0
-
-#define DIV_DC_WIDTH                   7
-#define DIV_CAM_WIDTH                  7
-#define DIV_CPU_WIDTH                  7
-#define DIV_DDR_WIDTH                  2
-
-#endif
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-rtc.h b/arch/mips/include/asm/mach-loongson32/regs-rtc.h
deleted file mode 100644 (file)
index a3d096b..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
- *
- * Loongson 1 RTC timer Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_RTC_H
-#define __ASM_MACH_LOONGSON32_REGS_RTC_H
-
-#define LS1X_RTC_REG(x) \
-               ((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + (x)))
-
-#define LS1X_RTC_CTRL  LS1X_RTC_REG(0x40)
-
-#define RTC_EXTCLK_OK  (BIT(5) | BIT(8))
-#define RTC_EXTCLK_EN  BIT(8)
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_RTC_H */
index cff52b2..fcec579 100644 (file)
@@ -10,7 +10,7 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/ftrace.h>
index 9b7c8ab..447a3ea 100644 (file)
@@ -11,7 +11,6 @@
  *    written by Carsten Langgaard, carstenl@mips.com
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/asm-offsets.h>
 #include <asm/mipsregs.h>
 #include <asm/regdef.h>
index 6c745aa..c000b22 100644 (file)
  * Further modifications to make this work:
  * Copyright (c) 1998 Harald Koerfgen
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index 71b1aaf..48e6394 100644 (file)
@@ -13,7 +13,6 @@
  */
 #include <asm/asm.h>
 #include <asm/cachectl.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index 4e8c985..4bb97ee 100644 (file)
  * Copyright (C) 2000 MIPS Technologies, Inc.
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index aa5583a..231ac05 100644 (file)
@@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
        /* Flush slot from GPA */
        kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
                              slot->base_gfn + slot->npages - 1);
-       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+       kvm_flush_remote_tlbs_memslot(kvm, slot);
        spin_unlock(&kvm->mmu_lock);
 }
 
@@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
                                        new->base_gfn + new->npages - 1);
                if (needs_flush)
-                       kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+                       kvm_flush_remote_tlbs_memslot(kvm, new);
                spin_unlock(&kvm->mmu_lock);
        }
 }
@@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        kvm_mips_callbacks->prepare_flush_shadow(kvm);
        return 1;
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        int r;
index e8c0898..7b2ac13 100644 (file)
@@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        gpa_t gpa = range->start << PAGE_SHIFT;
-       pte_t hva_pte = range->pte;
+       pte_t hva_pte = range->arg.pte;
        pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
        pte_t old_pte;
 
index 20622bf..8f20800 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/sched.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
index 9619996..4a808f8 100644 (file)
@@ -6,7 +6,8 @@
  */
 
 #include <linux/ioport.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 
index a492b1e..8d52001 100644 (file)
@@ -8,8 +8,9 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 #include "../clk.h"
index d444a1b..3ed0782 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/clkdev.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 
 #include <lantiq_soc.h>
index 2796e87..37c1330 100644 (file)
@@ -7,7 +7,8 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <lantiq_soc.h>
index 7767137..3d2ff41 100644 (file)
@@ -11,9 +11,9 @@
  * Copyright (C) 2014 Imagination Technologies Ltd.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_64BIT
index 18a43f2..a4b4e80 100644 (file)
@@ -32,9 +32,9 @@
 #undef CONFIG_CPU_HAS_PREFETCH
 #endif
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
index 0b342ba..79405c3 100644 (file)
@@ -8,9 +8,9 @@
  * Copyright (C) 2007 by Maciej W. Rozycki
  * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #if LONGSIZE == 4
index 13aaa99..94f4203 100644 (file)
@@ -7,9 +7,9 @@
  * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)                      \
index 6de31b6..c192a6f 100644 (file)
@@ -6,9 +6,9 @@
  * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
  * Copyright (c) 1999 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)                      \
index 64d7979..8075590 100644 (file)
@@ -265,14 +265,6 @@ struct platform_device ls1x_ehci_pdev = {
 };
 
 /* Real Time Clock */
-void __init ls1x_rtc_set_extclk(struct platform_device *pdev)
-{
-       u32 val = __raw_readl(LS1X_RTC_CTRL);
-
-       if (!(val & RTC_EXTCLK_OK))
-               __raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL);
-}
-
 struct platform_device ls1x_rtc_pdev = {
        .name           = "ls1x-rtc",
        .id             = -1,
index cdecd7a..e015a26 100644 (file)
@@ -187,181 +187,181 @@ static void csr_ipi_probe(void)
 
 static void ipi_set0_regs_init(void)
 {
-       ipi_set0_regs[0] = (void *)
+       ipi_set0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[1] = (void *)
+       ipi_set0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[2] = (void *)
+       ipi_set0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[3] = (void *)
+       ipi_set0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[4] = (void *)
+       ipi_set0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[5] = (void *)
+       ipi_set0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[6] = (void *)
+       ipi_set0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[7] = (void *)
+       ipi_set0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[8] = (void *)
+       ipi_set0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[9] = (void *)
+       ipi_set0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[10] = (void *)
+       ipi_set0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[11] = (void *)
+       ipi_set0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[12] = (void *)
+       ipi_set0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[13] = (void *)
+       ipi_set0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[14] = (void *)
+       ipi_set0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[15] = (void *)
+       ipi_set0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
 }
 
 static void ipi_clear0_regs_init(void)
 {
-       ipi_clear0_regs[0] = (void *)
+       ipi_clear0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[1] = (void *)
+       ipi_clear0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[2] = (void *)
+       ipi_clear0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[3] = (void *)
+       ipi_clear0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[4] = (void *)
+       ipi_clear0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[5] = (void *)
+       ipi_clear0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[6] = (void *)
+       ipi_clear0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[7] = (void *)
+       ipi_clear0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[8] = (void *)
+       ipi_clear0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[9] = (void *)
+       ipi_clear0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[10] = (void *)
+       ipi_clear0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[11] = (void *)
+       ipi_clear0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[12] = (void *)
+       ipi_clear0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[13] = (void *)
+       ipi_clear0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[14] = (void *)
+       ipi_clear0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[15] = (void *)
+       ipi_clear0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
 }
 
 static void ipi_status0_regs_init(void)
 {
-       ipi_status0_regs[0] = (void *)
+       ipi_status0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[1] = (void *)
+       ipi_status0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[2] = (void *)
+       ipi_status0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[3] = (void *)
+       ipi_status0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[4] = (void *)
+       ipi_status0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[5] = (void *)
+       ipi_status0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[6] = (void *)
+       ipi_status0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[7] = (void *)
+       ipi_status0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[8] = (void *)
+       ipi_status0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[9] = (void *)
+       ipi_status0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[10] = (void *)
+       ipi_status0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[11] = (void *)
+       ipi_status0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[12] = (void *)
+       ipi_status0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[13] = (void *)
+       ipi_status0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[14] = (void *)
+       ipi_status0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[15] = (void *)
+       ipi_status0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
 }
 
 static void ipi_en0_regs_init(void)
 {
-       ipi_en0_regs[0] = (void *)
+       ipi_en0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[1] = (void *)
+       ipi_en0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[2] = (void *)
+       ipi_en0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[3] = (void *)
+       ipi_en0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[4] = (void *)
+       ipi_en0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[5] = (void *)
+       ipi_en0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[6] = (void *)
+       ipi_en0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[7] = (void *)
+       ipi_en0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[8] = (void *)
+       ipi_en0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[9] = (void *)
+       ipi_en0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[10] = (void *)
+       ipi_en0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[11] = (void *)
+       ipi_en0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[12] = (void *)
+       ipi_en0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[13] = (void *)
+       ipi_en0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[14] = (void *)
+       ipi_en0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[15] = (void *)
+       ipi_en0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
 }
 
 static void ipi_mailbox_buf_init(void)
 {
-       ipi_mailbox_buf[0] = (void *)
+       ipi_mailbox_buf[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[1] = (void *)
+       ipi_mailbox_buf[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[2] = (void *)
+       ipi_mailbox_buf[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[3] = (void *)
+       ipi_mailbox_buf[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[4] = (void *)
+       ipi_mailbox_buf[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[5] = (void *)
+       ipi_mailbox_buf[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[6] = (void *)
+       ipi_mailbox_buf[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[7] = (void *)
+       ipi_mailbox_buf[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[8] = (void *)
+       ipi_mailbox_buf[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[9] = (void *)
+       ipi_mailbox_buf[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[10] = (void *)
+       ipi_mailbox_buf[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[11] = (void *)
+       ipi_mailbox_buf[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[12] = (void *)
+       ipi_mailbox_buf[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[13] = (void *)
+       ipi_mailbox_buf[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[14] = (void *)
+       ipi_mailbox_buf[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[15] = (void *)
+       ipi_mailbox_buf[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
 }
 
index 43181ac..42d0516 100644 (file)
@@ -8,8 +8,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
index 00fef57..2705d7d 100644 (file)
@@ -11,8 +11,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define FASTPATH_SIZE  128
index 79e29bf..80f7293 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/clk.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/of_pci.h>
+#include <linux/platform_device.h>
 
 #include <asm/addrspace.h>
 
index e9dd014..1cada09 100644 (file)
@@ -13,9 +13,8 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
-#include <linux/of_pci.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/rt288x.h>
 
index f695320..73be568 100644 (file)
@@ -5,7 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/spinlock.h>
 
 #include <asm/mach-pic32/pic32.h>
 
index f395ae2..25341b2 100644 (file)
@@ -5,8 +5,10 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/ralink_regs.h>
 
index fa353bc..46aef0a 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/io.h>
 #include <linux/bitops.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
index 45d60c0..7f90068 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/of_fdt.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 
 #include <asm/reboot.h>
index aaac1e6..c3b9686 100644 (file)
@@ -7,8 +7,6 @@
  */
 
 #include <linux/string.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 
 #include <asm/bootinfo.h>
 #include <asm/addrspace.h>
index e988455..5ae30b7 100644 (file)
@@ -51,6 +51,7 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
        unsigned short vid;
        int cap66 = -1;
        u16 stat;
+       int ret;
 
        /* It seems SLC90E66 needs some time after PCI reset... */
        mdelay(80);
@@ -60,9 +61,9 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
        for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) {
                if (PCI_FUNC(pci_devfn))
                        continue;
-               if (early_read_config_word(hose, top_bus, current_bus,
-                                          pci_devfn, PCI_VENDOR_ID, &vid) !=
-                   PCIBIOS_SUCCESSFUL)
+               ret = early_read_config_word(hose, top_bus, current_bus,
+                                            pci_devfn, PCI_VENDOR_ID, &vid);
+               if (ret != PCIBIOS_SUCCESSFUL)
                        continue;
                if (vid == 0xffff)
                        continue;
@@ -343,26 +344,28 @@ static void tc35815_fixup(struct pci_dev *dev)
 
 static void final_fixup(struct pci_dev *dev)
 {
+       unsigned long timeout;
        unsigned char bist;
+       int ret;
 
        /* Do build-in self test */
-       if (pci_read_config_byte(dev, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL &&
-           (bist & PCI_BIST_CAPABLE)) {
-               unsigned long timeout;
-               pci_set_power_state(dev, PCI_D0);
-               pr_info("PCI: %s BIST...", pci_name(dev));
-               pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
-               timeout = jiffies + HZ * 2;     /* timeout after 2 sec */
-               do {
-                       pci_read_config_byte(dev, PCI_BIST, &bist);
-                       if (time_after(jiffies, timeout))
-                               break;
-               } while (bist & PCI_BIST_START);
-               if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
-                       pr_cont("failed. (0x%x)\n", bist);
-               else
-                       pr_cont("OK.\n");
-       }
+       ret = pci_read_config_byte(dev, PCI_BIST, &bist);
+       if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE))
+               return;
+
+       pci_set_power_state(dev, PCI_D0);
+       pr_info("PCI: %s BIST...", pci_name(dev));
+       pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
+       timeout = jiffies + HZ * 2;     /* timeout after 2 sec */
+       do {
+               pci_read_config_byte(dev, PCI_BIST, &bist);
+               if (time_after(jiffies, timeout))
+                       break;
+       } while (bist & PCI_BIST_START);
+       if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
+               pr_cont("failed. (0x%x)\n", bist);
+       else
+               pr_cont("OK.\n");
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
index d90b657..836465e 100644 (file)
@@ -94,7 +94,9 @@ VERSION
 #ifndef CONFIG_MIPS_DISABLE_VDSO
        global:
                __vdso_clock_gettime;
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
                __vdso_gettimeofday;
+#endif
                __vdso_clock_getres;
 #if _MIPS_SIM != _MIPS_SIM_ABI64
                __vdso_clock_gettime64;
diff --git a/arch/openrisc/include/asm/bug.h b/arch/openrisc/include/asm/bug.h
new file mode 100644 (file)
index 0000000..6d04776
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_OPENRISC_BUG_H
+#define __ASM_OPENRISC_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+void __noreturn die(const char *str, struct pt_regs *regs, long err);
+
+#endif /* __ASM_OPENRISC_BUG_H */
index 52b0d7e..44fc1fd 100644 (file)
@@ -72,8 +72,15 @@ typedef struct page *pgtable_t;
 #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
 #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
 
-#define virt_to_pfn(kaddr)      (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)        __va((pfn) << PAGE_SHIFT)
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+       return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline void * pfn_to_virt(unsigned long pfn)
+{
+       return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
+}
 
 #define virt_to_page(addr) \
        (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
index ed9efb4..3b736e7 100644 (file)
@@ -73,6 +73,7 @@ struct thread_struct {
 
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 unsigned long __get_wchan(struct task_struct *p);
+void show_registers(struct pt_regs *regs);
 
 #define cpu_relax()     barrier()
 
index dfa558f..86e0292 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #define __KERNEL_SYSCALLS__
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
@@ -38,6 +39,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/spr_defs.h>
+#include <asm/switch_to.h>
 
 #include <linux/smp.h>
 
@@ -119,8 +121,6 @@ void flush_thread(void)
 
 void show_regs(struct pt_regs *regs)
 {
-       extern void show_registers(struct pt_regs *regs);
-
        show_regs_print_info(KERN_DEFAULT);
        /* __PHX__ cleanup this mess */
        show_registers(regs);
index 0b7d2ca..1eeac3b 100644 (file)
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
 /*
  * Copy the thread state to a regset that can be interpreted by userspace.
  *
index 2e7257a..e2f21a5 100644 (file)
@@ -34,6 +34,11 @@ struct rt_sigframe {
        unsigned char retcode[16];      /* trampoline code */
 };
 
+asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs);
+
+asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+                              int syscall);
+
 static int restore_sigcontext(struct pt_regs *regs,
                              struct sigcontext __user *sc)
 {
@@ -224,7 +229,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
  * mode below.
  */
 
-int do_signal(struct pt_regs *regs, int syscall)
+static int do_signal(struct pt_regs *regs, int syscall)
 {
        struct ksignal ksig;
        unsigned long continue_addr = 0;
index 0a7a059..1c5a2d7 100644 (file)
@@ -23,6 +23,8 @@
 #include <asm/cacheflush.h>
 #include <asm/time.h>
 
+asmlinkage __init void secondary_start_kernel(void);
+
 static void (*smp_cross_call)(const struct cpumask *, unsigned int);
 
 unsigned long secondary_release = -1;
index 8e26c1a..764c7bf 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/cpuinfo.h>
 #include <asm/time.h>
 
+irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs);
+
 /* Test the timer ticks to count, used in sync routine */
 inline void openrisc_timer_set(unsigned long count)
 {
index 0aa6b07..9370888 100644 (file)
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 
+#include <asm/bug.h>
 #include <asm/io.h>
+#include <asm/processor.h>
 #include <asm/unwinder.h>
 #include <asm/sections.h>
 
-static int kstack_depth_to_print = 0x180;
 int lwa_flag;
 static unsigned long __user *lwa_addr;
 
+asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector);
+asmlinkage void do_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_illegal_instruction(struct pt_regs *regs,
+                                      unsigned long address);
+
 static void print_trace(void *data, unsigned long addr, int reliable)
 {
        const char *loglvl = data;
@@ -143,80 +152,6 @@ bad:
        printk("\n");
 }
 
-void nommu_dump_state(struct pt_regs *regs,
-                     unsigned long ea, unsigned long vector)
-{
-       int i;
-       unsigned long addr, stack = regs->sp;
-
-       printk("\n\r[nommu_dump_state] :: ea %lx, vector %lx\n\r", ea, vector);
-
-       printk("CPU #: %d\n"
-              "   PC: %08lx    SR: %08lx    SP: %08lx\n",
-              0, regs->pc, regs->sr, regs->sp);
-       printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n",
-              0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]);
-       printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n",
-              regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]);
-       printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n",
-              regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]);
-       printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n",
-              regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]);
-       printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n",
-              regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]);
-       printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n",
-              regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]);
-       printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n",
-              regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]);
-       printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n",
-              regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]);
-       printk("  RES: %08lx oGPR11: %08lx\n",
-              regs->gpr[11], regs->orig_gpr11);
-
-       printk("Process %s (pid: %d, stackpage=%08lx)\n",
-              ((struct task_struct *)(__pa(current)))->comm,
-              ((struct task_struct *)(__pa(current)))->pid,
-              (unsigned long)current);
-
-       printk("\nStack: ");
-       printk("Stack dump [0x%08lx]:\n", (unsigned long)stack);
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               if (((long)stack & (THREAD_SIZE - 1)) == 0)
-                       break;
-               stack++;
-
-               printk("%lx :: sp + %02d: 0x%08lx\n", stack, i * 4,
-                      *((unsigned long *)(__pa(stack))));
-       }
-       printk("\n");
-
-       printk("Call Trace:   ");
-       i = 1;
-       while (((long)stack & (THREAD_SIZE - 1)) != 0) {
-               addr = *((unsigned long *)__pa(stack));
-               stack++;
-
-               if (kernel_text_address(addr)) {
-                       if (i && ((i % 6) == 0))
-                               printk("\n ");
-                       printk(" [<%08lx>]", addr);
-                       i++;
-               }
-       }
-       printk("\n");
-
-       printk("\nCode: ");
-
-       for (i = -24; i < 24; i++) {
-               unsigned long word;
-
-               word = ((unsigned long *)(__pa(regs->pc)))[i];
-
-               print_data(regs->pc, word, i);
-       }
-       printk("\n");
-}
-
 /* This is normally the 'Oops' routine */
 void __noreturn die(const char *str, struct pt_regs *regs, long err)
 {
index a9dcd43..29e232d 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/perf_event.h>
 
 #include <linux/uaccess.h>
+#include <asm/bug.h>
 #include <asm/mmu_context.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
@@ -30,7 +31,8 @@
  */
 volatile pgd_t *current_pgd[NR_CPUS];
 
-extern void __noreturn die(char *, struct pt_regs *, long);
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
+                             unsigned long vector, int write_acc);
 
 /*
  * This routine handles page faults.  It determines the address,
index d531ab8..1dcd78c 100644 (file)
@@ -123,8 +123,6 @@ static void __init map_ram(void)
 
 void __init paging_init(void)
 {
-       extern void tlb_init(void);
-
        int i;
 
        printk(KERN_INFO "Setting up paging and PTEs.\n");
index 91c8259..f59ea4c 100644 (file)
@@ -22,7 +22,7 @@
 
 extern int mem_init_done;
 
-/**
+/*
  * OK, this one's a bit tricky... ioremap can get called before memory is
  * initialized (early serial console does this) and will want to alloc a page
  * for its mapping.  No userspace pages will ever get allocated before memory
index e2f2a3c..3115f2e 100644 (file)
@@ -182,12 +182,3 @@ void destroy_context(struct mm_struct *mm)
        flush_tlb_mm(mm);
 
 }
-
-/* called once during VM initialization, from init.c */
-
-void __init tlb_init(void)
-{
-       /* Do nothing... */
-       /* invalidate the entire TLB */
-       /* flush_tlb_all(); */
-}
diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h
deleted file mode 100644 (file)
index 7aa75b9..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/include/asm-parisc/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the PARISC architecture specific IDE code.
- */
-
-#ifndef __ASM_PARISC_IDE_H
-#define __ASM_PARISC_IDE_H
-
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw     insw
-#define __ide_insl     insl
-#define __ide_outsw    outsw
-#define __ide_outsl    outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u16 *)addr = __raw_readw(port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u32 *)addr = __raw_readl(port);
-               addr += 4;
-       }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               __raw_writew(*(u16 *)addr, port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               __raw_writel(*(u32 *)addr, port);
-               addr += 4;
-       }
-}
-
-#endif /* __ASM_PARISC_IDE_H */
index 1c91a35..0d54e29 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Common security options for PowerPC builds
+
 # This is the equivalent of booting with lockdown=integrity
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
@@ -12,4 +14,4 @@ CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 
 # UBSAN bounds checking is very cheap and good for hardening
 CONFIG_UBSAN=y
-# CONFIG_UBSAN_MISC is not set
\ No newline at end of file
+# CONFIG_UBSAN_MISC is not set
diff --git a/arch/powerpc/include/asm/ide.h b/arch/powerpc/include/asm/ide.h
deleted file mode 100644 (file)
index ce87a44..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996 Linus Torvalds & authors
- *
- *  This file contains the powerpc architecture specific IDE code.
- */
-#ifndef _ASM_POWERPC_IDE_H
-#define _ASM_POWERPC_IDE_H
-
-#include <linux/compiler.h>
-#include <asm/io.h>
-
-#define __ide_mm_insw(p, a, c) readsw((void __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) readsl((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c)        writesw((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c)        writesl((void __iomem *)(p), (a), (c))
-
-#endif /* _ASM_POWERPC_IDE_H */
index f6af0f7..16ee163 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Build a 32-bit image
 CONFIG_ARCH_RV32I=y
 CONFIG_32BIT=y
 # CONFIG_PORTABLE is not set
index 313edc5..d872a2d 100644 (file)
@@ -1,2 +1,3 @@
+# Help: Build a 64-bit image
 CONFIG_ARCH_RV64I=y
 CONFIG_64BIT=y
index 7bac43a..777cb82 100644 (file)
@@ -54,6 +54,7 @@
 #ifndef CONFIG_64BIT
 #define SATP_PPN       _AC(0x003FFFFF, UL)
 #define SATP_MODE_32   _AC(0x80000000, UL)
+#define SATP_MODE_SHIFT        31
 #define SATP_ASID_BITS 9
 #define SATP_ASID_SHIFT        22
 #define SATP_ASID_MASK _AC(0x1FF, UL)
@@ -62,6 +63,7 @@
 #define SATP_MODE_39   _AC(0x8000000000000000, UL)
 #define SATP_MODE_48   _AC(0x9000000000000000, UL)
 #define SATP_MODE_57   _AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT        60
 #define SATP_ASID_BITS 16
 #define SATP_ASID_SHIFT        44
 #define SATP_ASID_MASK _AC(0xFFFF, UL)
index 2d8ee53..1ebf20d 100644 (file)
@@ -337,6 +337,15 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu);
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+                                   u64 __user *uindices);
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg);
+
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
index ff994fd..27f5bcc 100644 (file)
@@ -74,9 +74,7 @@ static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 #endif
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype);
+                                 const struct kvm_one_reg *reg);
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype);
+                                 const struct kvm_one_reg *reg);
 #endif
index 930fdc4..992c5e4 100644 (file)
@@ -55,6 +55,7 @@ struct kvm_riscv_config {
        unsigned long marchid;
        unsigned long mimpid;
        unsigned long zicboz_block_size;
+       unsigned long satp_mode;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
        KVM_RISCV_ISA_EXT_SSAIA,
        KVM_RISCV_ISA_EXT_V,
        KVM_RISCV_ISA_EXT_SVNAPOT,
+       KVM_RISCV_ISA_EXT_ZBA,
+       KVM_RISCV_ISA_EXT_ZBS,
+       KVM_RISCV_ISA_EXT_ZICNTR,
+       KVM_RISCV_ISA_EXT_ZICSR,
+       KVM_RISCV_ISA_EXT_ZIFENCEI,
+       KVM_RISCV_ISA_EXT_ZIHPM,
        KVM_RISCV_ISA_EXT_MAX,
 };
 
@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
 
 /* ISA Extension registers are mapped as type 7 */
 #define KVM_REG_RISCV_ISA_EXT          (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_SINGLE       (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_EN     (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_DIS    (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id)  \
+               ((__ext_id) / __BITS_PER_LONG)
+#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \
+               (1UL << ((__ext_id) % __BITS_PER_LONG))
+#define KVM_REG_RISCV_ISA_MULTI_REG_LAST       \
+               KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
 
 /* SBI extension registers are mapped as type 8 */
 #define KVM_REG_RISCV_SBI_EXT          (0x08 << KVM_REG_RISCV_TYPE_SHIFT)
index fee0671..4c2067f 100644 (file)
@@ -19,6 +19,7 @@ kvm-y += vcpu_exit.o
 kvm-y += vcpu_fp.o
 kvm-y += vcpu_vector.o
 kvm-y += vcpu_insn.o
+kvm-y += vcpu_onereg.o
 kvm-y += vcpu_switch.o
 kvm-y += vcpu_sbi.o
 kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
index 585a3b4..74bb274 100644 (file)
@@ -176,7 +176,7 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
        if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-               return -EINVAL;
+               return -ENOENT;
 
        *out_val = 0;
        if (kvm_riscv_aia_available())
@@ -192,7 +192,7 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
        if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-               return -EINVAL;
+               return -ENOENT;
 
        if (kvm_riscv_aia_available()) {
                ((unsigned long *)csr)[reg_num] = val;
index f2eb479..068c745 100644 (file)
@@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
 {
 }
@@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        int ret;
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.pgd)
                return false;
index d12ef99..82229db 100644 (file)
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
 #include <asm/kvm_vcpu_vector.h>
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
@@ -46,79 +42,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
                       sizeof(kvm_vcpu_stats_desc),
 };
 
-#define KVM_RISCV_BASE_ISA_MASK                GENMASK(25, 0)
-
-#define KVM_ISA_EXT_ARR(ext)           [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-
-/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
-static const unsigned long kvm_isa_ext_arr[] = {
-       [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
-       [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
-       [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
-       [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
-       [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
-       [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
-       [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
-       [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
-
-       KVM_ISA_EXT_ARR(SSAIA),
-       KVM_ISA_EXT_ARR(SSTC),
-       KVM_ISA_EXT_ARR(SVINVAL),
-       KVM_ISA_EXT_ARR(SVNAPOT),
-       KVM_ISA_EXT_ARR(SVPBMT),
-       KVM_ISA_EXT_ARR(ZBB),
-       KVM_ISA_EXT_ARR(ZIHINTPAUSE),
-       KVM_ISA_EXT_ARR(ZICBOM),
-       KVM_ISA_EXT_ARR(ZICBOZ),
-};
-
-static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
-{
-       unsigned long i;
-
-       for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
-               if (kvm_isa_ext_arr[i] == base_ext)
-                       return i;
-       }
-
-       return KVM_RISCV_ISA_EXT_MAX;
-}
-
-static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
-{
-       switch (ext) {
-       case KVM_RISCV_ISA_EXT_H:
-               return false;
-       case KVM_RISCV_ISA_EXT_V:
-               return riscv_v_vstate_ctrl_user_allowed();
-       default:
-               break;
-       }
-
-       return true;
-}
-
-static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
-{
-       switch (ext) {
-       case KVM_RISCV_ISA_EXT_A:
-       case KVM_RISCV_ISA_EXT_C:
-       case KVM_RISCV_ISA_EXT_I:
-       case KVM_RISCV_ISA_EXT_M:
-       case KVM_RISCV_ISA_EXT_SSAIA:
-       case KVM_RISCV_ISA_EXT_SSTC:
-       case KVM_RISCV_ISA_EXT_SVINVAL:
-       case KVM_RISCV_ISA_EXT_SVNAPOT:
-       case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
-       case KVM_RISCV_ISA_EXT_ZBB:
-               return false;
-       default:
-               break;
-       }
-
-       return true;
-}
-
 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
@@ -176,7 +99,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        int rc;
        struct kvm_cpu_context *cntx;
        struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
-       unsigned long host_isa, i;
 
        /* Mark this VCPU never ran */
        vcpu->arch.ran_atleast_once = false;
@@ -184,12 +106,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
 
        /* Setup ISA features available to VCPU */
-       for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
-               host_isa = kvm_isa_ext_arr[i];
-               if (__riscv_isa_extension_available(NULL, host_isa) &&
-                   kvm_riscv_vcpu_isa_enable_allowed(i))
-                       set_bit(host_isa, vcpu->arch.isa);
-       }
+       kvm_riscv_vcpu_setup_isa(vcpu);
 
        /* Setup vendor, arch, and implementation details */
        vcpu->arch.mvendorid = sbi_get_mvendorid();
@@ -294,450 +211,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
-                                        const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CONFIG);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       switch (reg_num) {
-       case KVM_REG_RISCV_CONFIG_REG(isa):
-               reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
-                       return -EINVAL;
-               reg_val = riscv_cbom_block_size;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
-                       return -EINVAL;
-               reg_val = riscv_cboz_block_size;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-               reg_val = vcpu->arch.mvendorid;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(marchid):
-               reg_val = vcpu->arch.marchid;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mimpid):
-               reg_val = vcpu->arch.mimpid;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
-                                        const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CONFIG);
-       unsigned long i, isa_ext, reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       switch (reg_num) {
-       case KVM_REG_RISCV_CONFIG_REG(isa):
-               /*
-                * This ONE REG interface is only defined for
-                * single letter extensions.
-                */
-               if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
-                       return -EINVAL;
-
-               if (!vcpu->arch.ran_atleast_once) {
-                       /* Ignore the enable/disable request for certain extensions */
-                       for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
-                               isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
-                               if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
-                                       reg_val &= ~BIT(i);
-                                       continue;
-                               }
-                               if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
-                                       if (reg_val & BIT(i))
-                                               reg_val &= ~BIT(i);
-                               if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
-                                       if (!(reg_val & BIT(i)))
-                                               reg_val |= BIT(i);
-                       }
-                       reg_val &= riscv_isa_extension_base(NULL);
-                       /* Do not modify anything beyond single letter extensions */
-                       reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
-                                 (reg_val & KVM_RISCV_BASE_ISA_MASK);
-                       vcpu->arch.isa[0] = reg_val;
-                       kvm_riscv_vcpu_fp_reset(vcpu);
-               } else {
-                       return -EOPNOTSUPP;
-               }
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-               return -EOPNOTSUPP;
-       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-               return -EOPNOTSUPP;
-       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.mvendorid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(marchid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.marchid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mimpid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.mimpid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
-                                      const struct kvm_one_reg *reg)
-{
-       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CORE);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-               reg_val = cntx->sepc;
-       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-               reg_val = ((unsigned long *)cntx)[reg_num];
-       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
-               reg_val = (cntx->sstatus & SR_SPP) ?
-                               KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
-       else
-               return -EINVAL;
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
-                                      const struct kvm_one_reg *reg)
-{
-       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CORE);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-               cntx->sepc = reg_val;
-       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-               ((unsigned long *)cntx)[reg_num] = reg_val;
-       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
-               if (reg_val == KVM_RISCV_MODE_S)
-                       cntx->sstatus |= SR_SPP;
-               else
-                       cntx->sstatus &= ~SR_SPP;
-       } else
-               return -EINVAL;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
-                                         unsigned long reg_num,
-                                         unsigned long *out_val)
-{
-       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-               kvm_riscv_vcpu_flush_interrupts(vcpu);
-               *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
-               *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
-       } else
-               *out_val = ((unsigned long *)csr)[reg_num];
-
-       return 0;
-}
-
-static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
-                                                unsigned long reg_num,
-                                                unsigned long reg_val)
-{
-       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-               reg_val &= VSIP_VALID_MASK;
-               reg_val <<= VSIP_TO_HVIP_SHIFT;
-       }
-
-       ((unsigned long *)csr)[reg_num] = reg_val;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
-               WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
-                                     const struct kvm_one_reg *reg)
-{
-       int rc;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CSR);
-       unsigned long reg_val, reg_subtype;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-       switch (reg_subtype) {
-       case KVM_REG_RISCV_CSR_GENERAL:
-               rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
-               break;
-       case KVM_REG_RISCV_CSR_AIA:
-               rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
-               break;
-       default:
-               rc = -EINVAL;
-               break;
-       }
-       if (rc)
-               return rc;
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
-                                     const struct kvm_one_reg *reg)
-{
-       int rc;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CSR);
-       unsigned long reg_val, reg_subtype;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-       switch (reg_subtype) {
-       case KVM_REG_RISCV_CSR_GENERAL:
-               rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
-               break;
-       case KVM_REG_RISCV_CSR_AIA:
-               rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
-               break;
-       default:
-               rc = -EINVAL;
-               break;
-       }
-       if (rc)
-               return rc;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
-                                         const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_ISA_EXT);
-       unsigned long reg_val = 0;
-       unsigned long host_isa_ext;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-               return -EINVAL;
-
-       host_isa_ext = kvm_isa_ext_arr[reg_num];
-       if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
-               reg_val = 1; /* Mark the given extension as available */
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
-                                         const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_ISA_EXT);
-       unsigned long reg_val;
-       unsigned long host_isa_ext;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       host_isa_ext = kvm_isa_ext_arr[reg_num];
-       if (!__riscv_isa_extension_available(NULL, host_isa_ext))
-               return  -EOPNOTSUPP;
-
-       if (!vcpu->arch.ran_atleast_once) {
-               /*
-                * All multi-letter extension and a few single letter
-                * extension can be disabled
-                */
-               if (reg_val == 1 &&
-                   kvm_riscv_vcpu_isa_enable_allowed(reg_num))
-                       set_bit(host_isa_ext, vcpu->arch.isa);
-               else if (!reg_val &&
-                        kvm_riscv_vcpu_isa_disable_allowed(reg_num))
-                       clear_bit(host_isa_ext, vcpu->arch.isa);
-               else
-                       return -EINVAL;
-               kvm_riscv_vcpu_fp_reset(vcpu);
-       } else {
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg)
-{
-       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-       case KVM_REG_RISCV_CONFIG:
-               return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
-       case KVM_REG_RISCV_CORE:
-               return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
-       case KVM_REG_RISCV_CSR:
-               return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
-       case KVM_REG_RISCV_TIMER:
-               return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
-       case KVM_REG_RISCV_FP_F:
-               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_F);
-       case KVM_REG_RISCV_FP_D:
-               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_D);
-       case KVM_REG_RISCV_ISA_EXT:
-               return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
-       case KVM_REG_RISCV_SBI_EXT:
-               return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
-       case KVM_REG_RISCV_VECTOR:
-               return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
-                                                KVM_REG_RISCV_VECTOR);
-       default:
-               break;
-       }
-
-       return -EINVAL;
-}
-
-static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg)
-{
-       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-       case KVM_REG_RISCV_CONFIG:
-               return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
-       case KVM_REG_RISCV_CORE:
-               return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
-       case KVM_REG_RISCV_CSR:
-               return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
-       case KVM_REG_RISCV_TIMER:
-               return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
-       case KVM_REG_RISCV_FP_F:
-               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_F);
-       case KVM_REG_RISCV_FP_D:
-               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_D);
-       case KVM_REG_RISCV_ISA_EXT:
-               return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
-       case KVM_REG_RISCV_SBI_EXT:
-               return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
-       case KVM_REG_RISCV_VECTOR:
-               return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
-                                                KVM_REG_RISCV_VECTOR);
-       default:
-               break;
-       }
-
-       return -EINVAL;
-}
-
 long kvm_arch_vcpu_async_ioctl(struct file *filp,
                               unsigned int ioctl, unsigned long arg)
 {
@@ -781,6 +254,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
                break;
        }
+       case KVM_GET_REG_LIST: {
+               struct kvm_reg_list __user *user_list = argp;
+               struct kvm_reg_list reg_list;
+               unsigned int n;
+
+               r = -EFAULT;
+               if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+                       break;
+               n = reg_list.n;
+               reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
+               if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+                       break;
+               r = -E2BIG;
+               if (n < reg_list.n)
+                       break;
+               r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
+               break;
+       }
        default:
                break;
        }
index 9d8cbc4..08ba48a 100644 (file)
@@ -96,7 +96,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
                          reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
                        reg_val = &cntx->fp.f.f[reg_num];
                else
-                       return -EINVAL;
+                       return -ENOENT;
        } else if ((rtype == KVM_REG_RISCV_FP_D) &&
                   riscv_isa_extension_available(vcpu->arch.isa, d)) {
                if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -109,9 +109,9 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
                                return -EINVAL;
                        reg_val = &cntx->fp.d.f[reg_num];
                } else
-                       return -EINVAL;
+                       return -ENOENT;
        } else
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
@@ -141,7 +141,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
                          reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
                        reg_val = &cntx->fp.f.f[reg_num];
                else
-                       return -EINVAL;
+                       return -ENOENT;
        } else if ((rtype == KVM_REG_RISCV_FP_D) &&
                   riscv_isa_extension_available(vcpu->arch.isa, d)) {
                if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -154,9 +154,9 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
                                return -EINVAL;
                        reg_val = &cntx->fp.d.f[reg_num];
                } else
-                       return -EINVAL;
+                       return -ENOENT;
        } else
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
new file mode 100644 (file)
index 0000000..1b7e9fa
--- /dev/null
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2023 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *     Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/hwcap.h>
+#include <asm/kvm_vcpu_vector.h>
+#include <asm/vector.h>
+
+#define KVM_RISCV_BASE_ISA_MASK                GENMASK(25, 0)
+
+#define KVM_ISA_EXT_ARR(ext)           \
+[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+       /* Single letter extensions (alphabetically sorted) */
+       [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+       [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+       [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+       [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+       [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+       [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+       [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+       [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
+       /* Multi letter extensions (alphabetically sorted) */
+       KVM_ISA_EXT_ARR(SSAIA),
+       KVM_ISA_EXT_ARR(SSTC),
+       KVM_ISA_EXT_ARR(SVINVAL),
+       KVM_ISA_EXT_ARR(SVNAPOT),
+       KVM_ISA_EXT_ARR(SVPBMT),
+       KVM_ISA_EXT_ARR(ZBA),
+       KVM_ISA_EXT_ARR(ZBB),
+       KVM_ISA_EXT_ARR(ZBS),
+       KVM_ISA_EXT_ARR(ZICBOM),
+       KVM_ISA_EXT_ARR(ZICBOZ),
+       KVM_ISA_EXT_ARR(ZICNTR),
+       KVM_ISA_EXT_ARR(ZICSR),
+       KVM_ISA_EXT_ARR(ZIFENCEI),
+       KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+       KVM_ISA_EXT_ARR(ZIHPM),
+};
+
+static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
+{
+       unsigned long i;
+
+       for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+               if (kvm_isa_ext_arr[i] == base_ext)
+                       return i;
+       }
+
+       return KVM_RISCV_ISA_EXT_MAX;
+}
+
+static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
+{
+       switch (ext) {
+       case KVM_RISCV_ISA_EXT_H:
+               return false;
+       case KVM_RISCV_ISA_EXT_V:
+               return riscv_v_vstate_ctrl_user_allowed();
+       default:
+               break;
+       }
+
+       return true;
+}
+
+static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
+{
+       switch (ext) {
+       case KVM_RISCV_ISA_EXT_A:
+       case KVM_RISCV_ISA_EXT_C:
+       case KVM_RISCV_ISA_EXT_I:
+       case KVM_RISCV_ISA_EXT_M:
+       case KVM_RISCV_ISA_EXT_SSAIA:
+       case KVM_RISCV_ISA_EXT_SSTC:
+       case KVM_RISCV_ISA_EXT_SVINVAL:
+       case KVM_RISCV_ISA_EXT_SVNAPOT:
+       case KVM_RISCV_ISA_EXT_ZBA:
+       case KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_RISCV_ISA_EXT_ZICNTR:
+       case KVM_RISCV_ISA_EXT_ZICSR:
+       case KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+       case KVM_RISCV_ISA_EXT_ZIHPM:
+               return false;
+       default:
+               break;
+       }
+
+       return true;
+}
+
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
+{
+       unsigned long host_isa, i;
+
+       for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
+               host_isa = kvm_isa_ext_arr[i];
+               if (__riscv_isa_extension_available(NULL, host_isa) &&
+                   kvm_riscv_vcpu_isa_enable_allowed(i))
+                       set_bit(host_isa, vcpu->arch.isa);
+       }
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+                                        const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CONFIG);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       switch (reg_num) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       return -ENOENT;
+               reg_val = riscv_cbom_block_size;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       return -ENOENT;
+               reg_val = riscv_cboz_block_size;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               reg_val = vcpu->arch.mvendorid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               reg_val = vcpu->arch.marchid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               reg_val = vcpu->arch.mimpid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               reg_val = satp_mode >> SATP_MODE_SHIFT;
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+                                        const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CONFIG);
+       unsigned long i, isa_ext, reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       switch (reg_num) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               /*
+                * This ONE REG interface is only defined for
+                * single letter extensions.
+                */
+               if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
+                       return -EINVAL;
+
+               /*
+                * Return early (i.e. do nothing) if reg_val is the same
+                * value retrievable via kvm_riscv_vcpu_get_reg_config().
+                */
+               if (reg_val == (vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK))
+                       break;
+
+               if (!vcpu->arch.ran_atleast_once) {
+                       /* Ignore the enable/disable request for certain extensions */
+                       for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
+                               isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+                               if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
+                                       reg_val &= ~BIT(i);
+                                       continue;
+                               }
+                               if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+                                       if (reg_val & BIT(i))
+                                               reg_val &= ~BIT(i);
+                               if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+                                       if (!(reg_val & BIT(i)))
+                                               reg_val |= BIT(i);
+                       }
+                       reg_val &= riscv_isa_extension_base(NULL);
+                       /* Do not modify anything beyond single letter extensions */
+                       reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
+                                 (reg_val & KVM_RISCV_BASE_ISA_MASK);
+                       vcpu->arch.isa[0] = reg_val;
+                       kvm_riscv_vcpu_fp_reset(vcpu);
+               } else {
+                       return -EBUSY;
+               }
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       return -ENOENT;
+               if (reg_val != riscv_cbom_block_size)
+                       return -EINVAL;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       return -ENOENT;
+               if (reg_val != riscv_cboz_block_size)
+                       return -EINVAL;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               if (reg_val == vcpu->arch.mvendorid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.mvendorid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               if (reg_val == vcpu->arch.marchid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.marchid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               if (reg_val == vcpu->arch.mimpid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.mimpid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               if (reg_val != (satp_mode >> SATP_MODE_SHIFT))
+                       return -EINVAL;
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+                                      const struct kvm_one_reg *reg)
+{
+       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CORE);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+               reg_val = cntx->sepc;
+       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+               reg_val = ((unsigned long *)cntx)[reg_num];
+       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+               reg_val = (cntx->sstatus & SR_SPP) ?
+                               KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+       else
+               return -ENOENT;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+                                      const struct kvm_one_reg *reg)
+{
+       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CORE);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+               cntx->sepc = reg_val;
+       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+               ((unsigned long *)cntx)[reg_num] = reg_val;
+       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+               if (reg_val == KVM_RISCV_MODE_S)
+                       cntx->sstatus |= SR_SPP;
+               else
+                       cntx->sstatus &= ~SR_SPP;
+       } else
+               return -ENOENT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
+                                         unsigned long reg_num,
+                                         unsigned long *out_val)
+{
+       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+               kvm_riscv_vcpu_flush_interrupts(vcpu);
+               *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+               *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
+       } else
+               *out_val = ((unsigned long *)csr)[reg_num];
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
+                                         unsigned long reg_num,
+                                         unsigned long reg_val)
+{
+       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+               reg_val &= VSIP_VALID_MASK;
+               reg_val <<= VSIP_TO_HVIP_SHIFT;
+       }
+
+       ((unsigned long *)csr)[reg_num] = reg_val;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+               WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+                                     const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CSR);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
+               break;
+       case KVM_REG_RISCV_CSR_AIA:
+               rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
+               break;
+       default:
+               rc = -ENOENT;
+               break;
+       }
+       if (rc)
+               return rc;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+                                     const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CSR);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
+               break;
+       case KVM_REG_RISCV_CSR_AIA:
+               rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
+               break;
+       default:
+               rc = -ENOENT;
+               break;
+       }
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
+                                        unsigned long reg_num,
+                                        unsigned long *reg_val)
+{
+       unsigned long host_isa_ext;
+
+       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+               return -ENOENT;
+
+       *reg_val = 0;
+       host_isa_ext = kvm_isa_ext_arr[reg_num];
+       if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+               *reg_val = 1; /* Mark the given extension as available */
+
+       return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
+                                        unsigned long reg_num,
+                                        unsigned long reg_val)
+{
+       unsigned long host_isa_ext;
+
+       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+               return -ENOENT;
+
+       host_isa_ext = kvm_isa_ext_arr[reg_num];
+       if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+               return -ENOENT;
+
+       if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa))
+               return 0;
+
+       if (!vcpu->arch.ran_atleast_once) {
+               /*
+                * All multi-letter extension and a few single letter
+                * extension can be disabled
+                */
+               if (reg_val == 1 &&
+                   kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+                       set_bit(host_isa_ext, vcpu->arch.isa);
+               else if (!reg_val &&
+                        kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+                       clear_bit(host_isa_ext, vcpu->arch.isa);
+               else
+                       return -EINVAL;
+               kvm_riscv_vcpu_fp_reset(vcpu);
+       } else {
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_multi(struct kvm_vcpu *vcpu,
+                                       unsigned long reg_num,
+                                       unsigned long *reg_val)
+{
+       unsigned long i, ext_id, ext_val;
+
+       if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+               return -ENOENT;
+
+       for (i = 0; i < BITS_PER_LONG; i++) {
+               ext_id = i + reg_num * BITS_PER_LONG;
+               if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+                       break;
+
+               ext_val = 0;
+               riscv_vcpu_get_isa_ext_single(vcpu, ext_id, &ext_val);
+               if (ext_val)
+                       *reg_val |= KVM_REG_RISCV_ISA_MULTI_MASK(ext_id);
+       }
+
+       return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_multi(struct kvm_vcpu *vcpu,
+                                       unsigned long reg_num,
+                                       unsigned long reg_val, bool enable)
+{
+       unsigned long i, ext_id;
+
+       if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+               return -ENOENT;
+
+       for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
+               ext_id = i + reg_num * BITS_PER_LONG;
+               if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+                       break;
+
+               riscv_vcpu_set_isa_ext_single(vcpu, ext_id, enable);
+       }
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+                                         const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_ISA_EXT);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_val = 0;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_ISA_SINGLE:
+               rc = riscv_vcpu_get_isa_ext_single(vcpu, reg_num, &reg_val);
+               break;
+       case KVM_REG_RISCV_ISA_MULTI_EN:
+       case KVM_REG_RISCV_ISA_MULTI_DIS:
+               rc = riscv_vcpu_get_isa_ext_multi(vcpu, reg_num, &reg_val);
+               if (!rc && reg_subtype == KVM_REG_RISCV_ISA_MULTI_DIS)
+                       reg_val = ~reg_val;
+               break;
+       default:
+               rc = -ENOENT;
+       }
+       if (rc)
+               return rc;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+                                         const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_ISA_EXT);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_ISA_SINGLE:
+               return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+               return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
+       default:
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n = 0;
+
+       for (int i = 0; i < sizeof(struct kvm_riscv_config)/sizeof(unsigned long);
+                i++) {
+               u64 size;
+               u64 reg;
+
+               /*
+                * Avoid reporting config reg if the corresponding extension
+                * was not available.
+                */
+               if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
+                       !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       continue;
+               else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
+                       !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       continue;
+
+               size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CONFIG | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               n++;
+       }
+
+       return n;
+}
+
+static unsigned long num_config_regs(const struct kvm_vcpu *vcpu)
+{
+       return copy_config_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_core_regs(void)
+{
+       return sizeof(struct kvm_riscv_core) / sizeof(unsigned long);
+}
+
+static int copy_core_reg_indices(u64 __user *uindices)
+{
+       int n = num_core_regs();
+
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CORE | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_csr_regs(const struct kvm_vcpu *vcpu)
+{
+       unsigned long n = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA))
+               n += sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+       return n;
+}
+
+static int copy_csr_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n1 = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+       int n2 = 0;
+
+       /* copy general csr regs */
+       for (int i = 0; i < n1; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+                                 KVM_REG_RISCV_CSR_GENERAL | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy AIA csr regs */
+       if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) {
+               n2 = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+               for (int i = 0; i < n2; i++) {
+                       u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                                  KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+                       u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+                                         KVM_REG_RISCV_CSR_AIA | i;
+
+                       if (uindices) {
+                               if (put_user(reg, uindices))
+                                       return -EFAULT;
+                               uindices++;
+                       }
+               }
+       }
+
+       return n1 + n2;
+}
+
+static inline unsigned long num_timer_regs(void)
+{
+       return sizeof(struct kvm_riscv_timer) / sizeof(u64);
+}
+
+static int copy_timer_reg_indices(u64 __user *uindices)
+{
+       int n = num_timer_regs();
+
+       for (int i = 0; i < n; i++) {
+               u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+                         KVM_REG_RISCV_TIMER | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_fp_f_regs(const struct kvm_vcpu *vcpu)
+{
+       const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, f))
+               return sizeof(cntx->fp.f) / sizeof(u32);
+       else
+               return 0;
+}
+
+static int copy_fp_f_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n = num_fp_f_regs(vcpu);
+
+       for (int i = 0; i < n; i++) {
+               u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 |
+                         KVM_REG_RISCV_FP_F | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_fp_d_regs(const struct kvm_vcpu *vcpu)
+{
+       const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, d))
+               return sizeof(cntx->fp.d.f) / sizeof(u64) + 1;
+       else
+               return 0;
+}
+
+static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int i;
+       int n = num_fp_d_regs(vcpu);
+       u64 reg;
+
+       /* copy fp.d.f indices */
+       for (i = 0; i < n-1; i++) {
+               reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+                     KVM_REG_RISCV_FP_D | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy fp.d.fcsr indices */
+       reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | i;
+       if (uindices) {
+               if (put_user(reg, uindices))
+                       return -EFAULT;
+               uindices++;
+       }
+
+       return n;
+}
+
+static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       unsigned int n = 0;
+       unsigned long isa_ext;
+
+       for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
+
+               isa_ext = kvm_isa_ext_arr[i];
+               if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext))
+                       continue;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               n++;
+       }
+
+       return n;
+}
+
+static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
+{
+       return copy_isa_ext_reg_indices(vcpu, NULL);;
+}
+
+static inline unsigned long num_sbi_ext_regs(void)
+{
+       /*
+        * number of KVM_REG_RISCV_SBI_SINGLE +
+        * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
+        */
+       return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
+}
+
+static int copy_sbi_ext_reg_indices(u64 __user *uindices)
+{
+       int n;
+
+       /* copy KVM_REG_RISCV_SBI_SINGLE */
+       n = KVM_RISCV_SBI_EXT_MAX;
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_SINGLE | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy KVM_REG_RISCV_SBI_MULTI */
+       n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_MULTI_EN | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_MULTI_DIS | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return num_sbi_ext_regs();
+}
+
+/*
+ * kvm_riscv_vcpu_num_regs - how many registers do we present via KVM_GET/SET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
+{
+       unsigned long res = 0;
+
+       res += num_config_regs(vcpu);
+       res += num_core_regs();
+       res += num_csr_regs(vcpu);
+       res += num_timer_regs();
+       res += num_fp_f_regs(vcpu);
+       res += num_fp_d_regs(vcpu);
+       res += num_isa_ext_regs(vcpu);
+       res += num_sbi_ext_regs();
+
+       return res;
+}
+
+/*
+ * kvm_riscv_vcpu_copy_reg_indices - get indices of all registers.
+ */
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+                                   u64 __user *uindices)
+{
+       int ret;
+
+       ret = copy_config_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_core_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_csr_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_timer_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_fp_f_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_fp_d_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_isa_ext_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_sbi_ext_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg)
+{
+       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+       case KVM_REG_RISCV_CORE:
+               return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+       case KVM_REG_RISCV_CSR:
+               return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+       case KVM_REG_RISCV_TIMER:
+               return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+       case KVM_REG_RISCV_FP_F:
+               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_F);
+       case KVM_REG_RISCV_FP_D:
+               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_D);
+       case KVM_REG_RISCV_ISA_EXT:
+               return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
+       case KVM_REG_RISCV_SBI_EXT:
+               return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
+       case KVM_REG_RISCV_VECTOR:
+               return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+       default:
+               break;
+       }
+
+       return -ENOENT;
+}
+
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg)
+{
+       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+       case KVM_REG_RISCV_CORE:
+               return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+       case KVM_REG_RISCV_CSR:
+               return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+       case KVM_REG_RISCV_TIMER:
+               return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+       case KVM_REG_RISCV_FP_F:
+               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_F);
+       case KVM_REG_RISCV_FP_D:
+               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_D);
+       case KVM_REG_RISCV_ISA_EXT:
+               return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
+       case KVM_REG_RISCV_SBI_EXT:
+               return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
+       case KVM_REG_RISCV_VECTOR:
+               return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+       default:
+               break;
+       }
+
+       return -ENOENT;
+}
index 7b46e04..9cd9709 100644 (file)
@@ -140,8 +140,10 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
        const struct kvm_riscv_sbi_extension_entry *sext = NULL;
        struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
-       if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
-           (reg_val != 1 && reg_val != 0))
+       if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
+               return -ENOENT;
+
+       if (reg_val != 1 && reg_val != 0)
                return -EINVAL;
 
        for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
@@ -175,7 +177,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
        if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
-               return -EINVAL;
+               return -ENOENT;
 
        for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
                if (sbi_ext[i].ext_idx == reg_num) {
@@ -206,7 +208,7 @@ static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu,
        unsigned long i, ext_id;
 
        if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-               return -EINVAL;
+               return -ENOENT;
 
        for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
                ext_id = i + reg_num * BITS_PER_LONG;
@@ -226,7 +228,7 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu,
        unsigned long i, ext_id, ext_val;
 
        if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-               return -EINVAL;
+               return -ENOENT;
 
        for (i = 0; i < BITS_PER_LONG; i++) {
                ext_id = i + reg_num * BITS_PER_LONG;
@@ -272,7 +274,7 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
        case KVM_REG_RISCV_SBI_MULTI_DIS:
                return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
        default:
-               return -EINVAL;
+               return -ENOENT;
        }
 
        return 0;
@@ -307,7 +309,7 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
                        reg_val = ~reg_val;
                break;
        default:
-               rc = -EINVAL;
+               rc = -ENOENT;
        }
        if (rc)
                return rc;
index 3ac2ff6..75486b2 100644 (file)
@@ -170,7 +170,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
        if (KVM_REG_SIZE(reg->id) != sizeof(u64))
                return -EINVAL;
        if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-               return -EINVAL;
+               return -ENOENT;
 
        switch (reg_num) {
        case KVM_REG_RISCV_TIMER_REG(frequency):
@@ -187,7 +187,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
                                          KVM_RISCV_TIMER_STATE_OFF;
                break;
        default:
-               return -EINVAL;
+               return -ENOENT;
        }
 
        if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
@@ -211,14 +211,15 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
        if (KVM_REG_SIZE(reg->id) != sizeof(u64))
                return -EINVAL;
        if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
 
        switch (reg_num) {
        case KVM_REG_RISCV_TIMER_REG(frequency):
-               ret = -EOPNOTSUPP;
+               if (reg_val != riscv_timebase)
+                       return -EINVAL;
                break;
        case KVM_REG_RISCV_TIMER_REG(time):
                gt->time_delta = reg_val - get_cycles64();
@@ -233,7 +234,7 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
                        ret = kvm_riscv_vcpu_timer_cancel(t);
                break;
        default:
-               ret = -EINVAL;
+               ret = -ENOENT;
                break;
        }
 
index edd2eec..b430cbb 100644 (file)
@@ -91,95 +91,93 @@ void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 }
 #endif
 
-static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
-                                     unsigned long reg_num,
-                                     size_t reg_size)
+static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
+                                   unsigned long reg_num,
+                                   size_t reg_size,
+                                   void **reg_addr)
 {
        struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       void *reg_val;
        size_t vlenb = riscv_v_vsize / 32;
 
        if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
                if (reg_size != sizeof(unsigned long))
-                       return NULL;
+                       return -EINVAL;
                switch (reg_num) {
                case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
-                       reg_val = &cntx->vector.vstart;
+                       *reg_addr = &cntx->vector.vstart;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
-                       reg_val = &cntx->vector.vl;
+                       *reg_addr = &cntx->vector.vl;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
-                       reg_val = &cntx->vector.vtype;
+                       *reg_addr = &cntx->vector.vtype;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
-                       reg_val = &cntx->vector.vcsr;
+                       *reg_addr = &cntx->vector.vcsr;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
                default:
-                       return NULL;
+                       return -ENOENT;
                }
        } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
                if (reg_size != vlenb)
-                       return NULL;
-               reg_val = cntx->vector.datap
-                         + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
+                       return -EINVAL;
+               *reg_addr = cntx->vector.datap +
+                           (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
        } else {
-               return NULL;
+               return -ENOENT;
        }
 
-       return reg_val;
+       return 0;
 }
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype)
+                                 const struct kvm_one_reg *reg)
 {
        unsigned long *isa = vcpu->arch.isa;
        unsigned long __user *uaddr =
                        (unsigned long __user *)(unsigned long)reg->addr;
        unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
                                            KVM_REG_SIZE_MASK |
-                                           rtype);
-       void *reg_val = NULL;
+                                           KVM_REG_RISCV_VECTOR);
        size_t reg_size = KVM_REG_SIZE(reg->id);
+       void *reg_addr;
+       int rc;
 
-       if (rtype == KVM_REG_RISCV_VECTOR &&
-           riscv_isa_extension_available(isa, v)) {
-               reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-       }
+       if (!riscv_isa_extension_available(isa, v))
+               return -ENOENT;
 
-       if (!reg_val)
-               return -EINVAL;
+       rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+       if (rc)
+               return rc;
 
-       if (copy_to_user(uaddr, reg_val, reg_size))
+       if (copy_to_user(uaddr, reg_addr, reg_size))
                return -EFAULT;
 
        return 0;
 }
 
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype)
+                                 const struct kvm_one_reg *reg)
 {
        unsigned long *isa = vcpu->arch.isa;
        unsigned long __user *uaddr =
                        (unsigned long __user *)(unsigned long)reg->addr;
        unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
                                            KVM_REG_SIZE_MASK |
-                                           rtype);
-       void *reg_val = NULL;
+                                           KVM_REG_RISCV_VECTOR);
        size_t reg_size = KVM_REG_SIZE(reg->id);
+       void *reg_addr;
+       int rc;
 
-       if (rtype == KVM_REG_RISCV_VECTOR &&
-           riscv_isa_extension_available(isa, v)) {
-               reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-       }
+       if (!riscv_isa_extension_available(isa, v))
+               return -ENOENT;
 
-       if (!reg_val)
-               return -EINVAL;
+       rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+       if (rc)
+               return rc;
 
-       if (copy_from_user(reg_val, uaddr, reg_size))
+       if (copy_from_user(reg_addr, uaddr, reg_size))
                return -EFAULT;
 
        return 0;
index 8753cb0..7b75217 100644 (file)
@@ -19,7 +19,6 @@ struct parmarea parmarea __section(".parmarea") = {
 };
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-int __bootdata(noexec_disabled);
 
 unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
@@ -290,12 +289,6 @@ void parse_boot_command_line(void)
                                zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
                }
 
-               if (!strcmp(param, "noexec")) {
-                       rc = kstrtobool(val, &enabled);
-                       if (!rc && !enabled)
-                               noexec_disabled = 1;
-               }
-
                if (!strcmp(param, "facilities") && val)
                        modify_fac_list(val);
 
index b9681cb..d3e48bd 100644 (file)
@@ -53,10 +53,8 @@ static void detect_facilities(void)
        }
        if (test_facility(78))
                machine.has_edat2 = 1;
-       if (!noexec_disabled && test_facility(130)) {
+       if (test_facility(130))
                machine.has_nx = 1;
-               __ctl_set_bit(0, 20);
-       }
 }
 
 static void setup_lpp(void)
index c67f59d..01257ce 100644 (file)
@@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
                        if (kasan_pte_populate_zero_shadow(pte, mode))
                                continue;
                        entry = __pte(_pa(addr, PAGE_SIZE, mode));
-                       entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+                       entry = set_pte_bit(entry, PAGE_KERNEL);
+                       if (!machine.has_nx)
+                               entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
                        set_pte(pte, entry);
                        pages++;
                }
@@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
                                continue;
                        if (can_large_pmd(pmd, addr, next)) {
                                entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
-                               entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+                               entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+                               if (!machine.has_nx)
+                                       entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
                                set_pmd(pmd, entry);
                                pages++;
                                continue;
@@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
                                continue;
                        if (can_large_pud(pud, addr, next)) {
                                entry = __pud(_pa(addr, _REGION3_SIZE, mode));
-                               entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+                               entry = set_pud_bit(entry, REGION3_KERNEL);
+                               if (!machine.has_nx)
+                                       entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
                                set_pud(pud, entry);
                                pages++;
                                continue;
index 39227b4..eb7f84f 100644 (file)
@@ -1 +1,2 @@
+# Help: Enable BTF debug info
 CONFIG_DEBUG_INFO_BTF=y
index 700a8b2..84c2b55 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Enable KASan for debugging
 CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_KASAN_VMALLOC=y
index e82e562..c4c28c2 100644 (file)
@@ -18,7 +18,6 @@ struct airq_struct {
        struct hlist_node list;         /* Handler queueing. */
        void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
        u8 *lsi_ptr;                    /* Local-Summary-Indicator pointer */
-       u8 lsi_mask;                    /* Local-Summary-Indicator mask */
        u8 isc;                         /* Interrupt-subclass */
        u8 flags;
 };
index c260adb..7fe3e31 100644 (file)
@@ -9,6 +9,6 @@
  * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
  * by the 31 bit heritage.
  */
-#define MAX_DMA_ADDRESS         0x80000000
+#define MAX_DMA_ADDRESS                __va(0x80000000)
 
 #endif /* _ASM_S390_DMA_H */
index 91bfecb..427f952 100644 (file)
@@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
        __u64 *fac_list;
        u64 cpuid;
        unsigned short ibc;
+       /* subset of available UV-features for pv-guests enabled by user space */
+       struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
 };
 
 typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
index 3fecaa4..0486e6e 100644 (file)
@@ -23,7 +23,7 @@
  */
 #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
 
-extern unsigned long __samode31, __eamode31;
-extern unsigned long __stext_amode31, __etext_amode31;
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
 
 #endif
index 7a3eefd..06fbabe 100644 (file)
@@ -24,43 +24,41 @@ enum {
 #define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT)
 #define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT)
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags);
-
-static inline int set_memory_ro(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RO);
-}
-
-static inline int set_memory_rw(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RW);
-}
-
-static inline int set_memory_nx(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_NX);
-}
-
-static inline int set_memory_x(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_X);
-}
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
 
 #define set_memory_rox set_memory_rox
-static inline int set_memory_rox(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X);
-}
 
-static inline int set_memory_rwnx(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX);
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags)                                        \
+static inline int fname(unsigned long addr, int numpages)              \
+{                                                                      \
+       return __set_memory(addr, numpages, (flags));                   \
+}                                                                      \
+                                                                       \
+static inline int __##fname(void *start, void *end)                    \
+{                                                                      \
+       unsigned long numpages;                                         \
+                                                                       \
+       numpages = (end - start) >> PAGE_SHIFT;                         \
+       return __set_memory((unsigned long)start, numpages, (flags));   \
 }
 
-static inline int set_memory_4k(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_4K);
-}
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
index b30fe91..25cadc2 100644 (file)
@@ -72,7 +72,6 @@ extern unsigned int zlib_dfltcc_support;
 #define ZLIB_DFLTCC_INFLATE_ONLY       3
 #define ZLIB_DFLTCC_FULL_DEBUG         4
 
-extern int noexec_disabled;
 extern unsigned long ident_map_size;
 extern unsigned long max_mappable;
 
index d2cd42b..0e7bd38 100644 (file)
@@ -99,6 +99,8 @@ enum uv_cmds_inst {
 enum uv_feat_ind {
        BIT_UV_FEAT_MISC = 0,
        BIT_UV_FEAT_AIV = 1,
+       BIT_UV_FEAT_AP = 4,
+       BIT_UV_FEAT_AP_INTR = 5,
 };
 
 struct uv_cb_header {
@@ -159,7 +161,15 @@ struct uv_cb_cgc {
        u64 guest_handle;
        u64 conf_base_stor_origin;
        u64 conf_virt_stor_origin;
-       u64 reserved30;
+       u8  reserved30[6];
+       union {
+               struct {
+                       u16 : 14;
+                       u16 ap_instr_intr : 1;
+                       u16 ap_allow_instr : 1;
+               };
+               u16 raw;
+       } flags;
        u64 guest_stor_origin;
        u64 guest_stor_len;
        u64 guest_sca;
@@ -397,6 +407,13 @@ struct uv_info {
 
 extern struct uv_info uv_info;
 
+static inline bool uv_has_feature(u8 feature_bit)
+{
+       if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+               return false;
+       return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 extern int prot_virt_guest;
 
index a73cf01..abe926d 100644 (file)
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
        __u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST        6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST  7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS        64
+struct kvm_s390_vm_cpu_uv_feat {
+       union {
+               struct {
+                       __u64 : 4;
+                       __u64 ap : 1;           /* bit 4 */
+                       __u64 ap_intr : 1;      /* bit 5 */
+                       __u64 : 58;
+               };
+               __u64 feat;
+       };
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 2dd5976..442ce04 100644 (file)
@@ -44,7 +44,6 @@ early_param(#param, ignore_decompressor_param_##param)
 decompressor_handled_param(mem);
 decompressor_handled_param(vmalloc);
 decompressor_handled_param(dfltcc);
-decompressor_handled_param(noexec);
 decompressor_handled_param(facilities);
 decompressor_handled_param(nokaslr);
 #if IS_ENABLED(CONFIG_KVM)
@@ -233,10 +232,8 @@ static __init void detect_machine_facilities(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
                __ctl_set_bit(0, 17);
        }
-       if (test_facility(130) && !noexec_disabled) {
+       if (test_facility(130))
                S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
-               __ctl_set_bit(0, 20);
-       }
        if (test_facility(133))
                S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
        if (test_facility(139) && (tod_clock_base.tod >> 63)) {
index 12a2bd4..ce65fc0 100644 (file)
@@ -216,8 +216,8 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_SYMBOL(lowcore_ptr);
        VMCOREINFO_SYMBOL(high_memory);
        VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-       vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
-       vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
+       vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+       vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
        vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
        abs_lc = get_abs_lowcore();
        abs_lc->vmcore_info = paddr_vmcoreinfo_note();
index c744104..de6ad0f 100644 (file)
@@ -97,10 +97,10 @@ EXPORT_SYMBOL(console_irq);
  * relocated above 2 GB, because it has to use 31 bit addresses.
  * Such code and data is part of the .amode31 section.
  */
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
 
@@ -145,7 +145,6 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
-int __bootdata(noexec_disabled);
 unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
@@ -771,15 +770,15 @@ static void __init setup_memory(void)
 static void __init relocate_amode31_section(void)
 {
        unsigned long amode31_size = __eamode31 - __samode31;
-       long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
-       long *ptr;
+       long amode31_offset, *ptr;
 
+       amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
        pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
 
        /* Move original AMODE31 section to the new one */
-       memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
+       memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
        /* Zero out the old AMODE31 section to catch invalid accesses within it */
-       memset((void *)__samode31, 0, amode31_size);
+       memset(__samode31, 0, amode31_size);
 
        /* Update all AMODE31 region references */
        for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
index b771f1b..fc07bc3 100644 (file)
@@ -258,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
         * shared page from a different protected VM will automatically also
         * transfer its ownership.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+       if (uv_has_feature(BIT_UV_FEAT_MISC))
                return false;
        if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
                return false;
index 341abaf..b163520 100644 (file)
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+       if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) &&
+           vcpu->arch.sie_block->iprcc != PGM_PER) {
+               /*
+                * __vcpu_run() will exit after delivering the concurrently
+                * indicated condition.
+                */
+               return false;
+       }
+       return true;
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
        psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
        if (kvm_s390_pv_cpu_is_protected(vcpu))
                return -EOPNOTSUPP;
 
-       if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+       if (should_handle_per_event(vcpu)) {
                rc = kvm_s390_handle_per_event(vcpu);
                if (rc)
                        return rc;
@@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
        return handle_instruction(vcpu);
 }
 
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+       /* Process PER, also if the instruction is processed in user space. */
+       if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+               return false;
+       if (rc != 0 && rc != -EOPNOTSUPP)
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+               /* __vcpu_run() will exit after delivering the interrupt. */
+               return false;
+       return true;
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
        int rc, per_rc = 0;
@@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                rc = handle_partial_execution(vcpu);
                break;
        case ICPT_KSS:
-               rc = kvm_s390_skey_check_enable(vcpu);
-               break;
+               /* Instruction will be redriven, skip the PER check. */
+               return kvm_s390_skey_check_enable(vcpu);
        case ICPT_MCHKREQ:
        case ICPT_INT_ENABLE:
                /*
@@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP;
        }
 
-       /* process PER, also if the instruction is processed in user space */
-       if (vcpu->arch.sie_block->icptstatus & 0x02 &&
-           (!rc || rc == -EOPNOTSUPP))
+       if (should_handle_per_ifetch(vcpu, rc))
                per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
        return per_rc ? per_rc : rc;
 }
index 9bd0a87..c1b47d6 100644 (file)
@@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc = 0;
+       bool delivered = false;
        unsigned long irq_type;
        unsigned long irqs;
 
@@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                        WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
                        clear_bit(irq_type, &li->pending_irqs);
                }
+               delivered |= !rc;
+       }
+
+       /*
+        * We delivered at least one interrupt and modified the PC. Force a
+        * singlestep event now.
+        */
+       if (delivered && guestdbg_sstep_enabled(vcpu)) {
+               struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+               debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+               debug_exit->type = KVM_SINGLESTEP;
+               vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
        }
 
        set_intercept_indicators(vcpu);
@@ -3398,7 +3412,6 @@ static void gib_alert_irq_handler(struct airq_struct *airq,
 
 static struct airq_struct gib_alert_irq = {
        .handler = gib_alert_irq_handler,
-       .lsi_ptr = &gib_alert_irq.lsi_mask,
 };
 
 void kvm_s390_gib_destroy(void)
@@ -3438,6 +3451,8 @@ int __init kvm_s390_gib_init(u8 nisc)
                rc = -EIO;
                goto out_free_gib;
        }
+       /* adapter interrupts used for AP (applicable here) don't use the LSI */
+       *gib_alert_irq.lsi_ptr = 0xff;
 
        gib->nisc = nisc;
        gib_origin = virt_to_phys(gib);
index d1e768b..b3f17e0 100644 (file)
@@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
        return 0;
 }
 
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK     \
+(                                              \
+       ((struct kvm_s390_vm_cpu_uv_feat){      \
+               .ap = 1,                        \
+               .ap_intr = 1,                   \
+       })                                      \
+       .feat                                   \
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+       unsigned long data, filter;
+
+       filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (get_user(data, &ptr->feat))
+               return -EFAULT;
+       if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       if (kvm->created_vcpus) {
+               mutex_unlock(&kvm->lock);
+               return -EBUSY;
+       }
+       kvm->arch.model.uv_feat_guest.feat = data;
+       mutex_unlock(&kvm->lock);
+
+       VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+       return 0;
+}
+
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
                ret = kvm_s390_set_processor_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_set_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
        return 0;
 }
 
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat;
+
+       BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+       feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                ret = kvm_s390_get_machine_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+               break;
+       case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
                case KVM_S390_VM_CPU_MACHINE_FEAT:
                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+               case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
                        ret = 0;
                        break;
                default:
@@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
        struct kvm_vcpu *vcpu;
 
        /* Disable the GISA if the ultravisor does not support AIV. */
-       if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+       if (!uv_has_feature(BIT_UV_FEAT_AIV))
                kvm_s390_gisa_disable(kvm);
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
+       kvm->arch.model.uv_feat_guest.feat = 0;
+
        kvm_s390_crypto_init(kvm);
 
        if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
        if (!kvm_is_ucontrol(vcpu->kvm)) {
                rc = kvm_s390_deliver_pending_interrupts(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        return rc;
        }
 
@@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
        do {
                rc = vcpu_pre_run(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        break;
 
                kvm_vcpu_srcu_read_unlock(vcpu);
@@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int rc;
 
        switch (ioctl) {
        case KVM_S390_IRQ: {
@@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 
                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
                        return -EFAULT;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
@@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
                        return -EFAULT;
                if (s390int_to_s390irq(&s390int, &s390irq))
                        return -EINVAL;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
+       default:
+               rc = -ENOIOCTLCMD;
+               break;
        }
-       return -ENOIOCTLCMD;
+
+       /*
+        * To simplify single stepping of userspace-emulated instructions,
+        * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+        * should_handle_per_ifetch()). However, if userspace emulation injects
+        * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+        * after (and not before) the interrupt delivery.
+        */
+       if (!rc)
+               vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+       return rc;
 }
 
 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
index 8d3f39a..75e81ba 100644 (file)
@@ -285,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
        WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
        KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
                     uvcb.header.rc, uvcb.header.rrc);
-       WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+       WARN_ONCE(cc && uvcb.header.rc != 0x104,
+                 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
                  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
        /* Intended memory leak on "impossible" error */
        if (!cc)
@@ -575,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
        uvcb.conf_base_stor_origin =
                virt_to_phys((void *)kvm->arch.pv.stor_base);
        uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+       uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+       uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
 
        cc = uv_call_sched(0, (u64)&uvcb);
        *rc = uvcb.header.rc;
        *rrc = uvcb.header.rrc;
-       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
-                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
 
        /* Outputs */
        kvm->arch.pv.handle = uvcb.guest_handle;
index afa5db7..b516669 100644 (file)
@@ -290,8 +290,8 @@ static int pt_dump_init(void)
        max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
        max_addr = 1UL << (max_addr * 11 + 31);
        address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
-       address_markers[AMODE31_START_NR].start_address = __samode31;
-       address_markers[AMODE31_END_NR].start_address = __eamode31;
+       address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
+       address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
        address_markers[MODULES_NR].start_address = MODULES_VADDR;
        address_markers[MODULES_END_NR].start_address = MODULES_END;
        address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
index 099c482..b678295 100644 (file)
@@ -598,7 +598,7 @@ void do_secure_storage_access(struct pt_regs *regs)
         * reliable without the misc UV feature so we need to check
         * for that as well.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+       if (uv_has_feature(BIT_UV_FEAT_MISC) &&
            !test_bit_inv(61, &regs->int_parm_long)) {
                /*
                 * When this happens, userspace did something that it
index 8d94e29..8b94d22 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        sparse_init();
        zone_dma_bits = 31;
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-       max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+       max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
        free_area_init(max_zone_pfns);
 }
@@ -107,7 +107,7 @@ void mark_rodata_ro(void)
 {
        unsigned long size = __end_ro_after_init - __start_ro_after_init;
 
-       set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+       __set_memory_ro(__start_ro_after_init, __end_ro_after_init);
        pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
        debug_checkwx();
 }
index e5ec762..b87e96c 100644 (file)
@@ -373,7 +373,7 @@ static int change_page_attr_alias(unsigned long addr, unsigned long end,
        return rc;
 }
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
 {
        unsigned long end;
        int rc;
index e44243b..6957d2e 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <linux/memory_hotplug.h>
 #include <linux/memblock.h>
-#include <linux/kasan.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/init.h>
@@ -291,14 +290,9 @@ out:
 
 static void try_free_pmd_table(pud_t *pud, unsigned long start)
 {
-       const unsigned long end = start + PUD_SIZE;
        pmd_t *pmd;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        pmd = pmd_offset(pud, start);
        for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
                if (!pmd_none(*pmd))
@@ -363,14 +357,9 @@ out:
 
 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
 {
-       const unsigned long end = start + P4D_SIZE;
        pud_t *pud;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        pud = pud_offset(p4d, start);
        for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
                if (!pud_none(*pud))
@@ -413,14 +402,9 @@ out:
 
 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
 {
-       const unsigned long end = start + PGDIR_SIZE;
        p4d_t *p4d;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        p4d = p4d_offset(pgd, start);
        for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
                if (!p4d_none(*p4d))
@@ -440,6 +424,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
 
        if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
                return -EINVAL;
+       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+       if (WARN_ON_ONCE(end > VMALLOC_START))
+               return -EINVAL;
        for (addr = start; addr < end; addr = next) {
                next = pgd_addr_end(addr, end);
                pgd = pgd_offset_k(addr);
@@ -650,122 +637,29 @@ void vmem_unmap_4k_page(unsigned long addr)
        mutex_unlock(&vmem_mutex);
 }
 
-static int __init memblock_region_cmp(const void *a, const void *b)
-{
-       const struct memblock_region *r1 = a;
-       const struct memblock_region *r2 = b;
-
-       if (r1->base < r2->base)
-               return -1;
-       if (r1->base > r2->base)
-               return 1;
-       return 0;
-}
-
-static void __init memblock_region_swap(void *a, void *b, int size)
-{
-       swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
-}
-
-#ifdef CONFIG_KASAN
-#define __sha(x)       ((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static inline int set_memory_kasan(unsigned long start, unsigned long end)
-{
-       start = PAGE_ALIGN_DOWN(__sha(start));
-       end = PAGE_ALIGN(__sha(end));
-       return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
-}
-#endif
-
-/*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
- */
 void __init vmem_map_init(void)
 {
-       struct memblock_region memory_rwx_regions[] = {
-               {
-                       .base   = 0,
-                       .size   = sizeof(struct lowcore),
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __pa(_stext),
-                       .size   = _etext - _stext,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __pa(_sinittext),
-                       .size   = _einittext - _sinittext,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __stext_amode31,
-                       .size   = __etext_amode31 - __stext_amode31,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-       };
-       struct memblock_type memory_rwx = {
-               .regions        = memory_rwx_regions,
-               .cnt            = ARRAY_SIZE(memory_rwx_regions),
-               .max            = ARRAY_SIZE(memory_rwx_regions),
-       };
-       phys_addr_t base, end;
-       u64 i;
-
+       __set_memory_rox(_stext, _etext);
+       __set_memory_ro(_etext, __end_rodata);
+       __set_memory_rox(_sinittext, _einittext);
+       __set_memory_rox(__stext_amode31, __etext_amode31);
        /*
-        * Set RW+NX attribute on all memory, except regions enumerated with
-        * memory_rwx exclude type. These regions need different attributes,
-        * which are enforced afterwards.
-        *
-        * __for_each_mem_range() iterate and exclude types should be sorted.
-        * The relative location of _stext and _sinittext is hardcoded in the
-        * linker script. However a location of __stext_amode31 and the kernel
-        * image itself are chosen dynamically. Thus, sort the exclude type.
+        * If the BEAR-enhancement facility is not installed the first
+        * prefix page is used to return to the previous context with
+        * an LPSWE instruction and therefore must be executable.
         */
-       sort(&memory_rwx_regions,
-            ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
-            memblock_region_cmp, memblock_region_swap);
-       __for_each_mem_range(i, &memblock.memory, &memory_rwx,
-                            NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
-               set_memory_rwnx((unsigned long)__va(base),
-                               (end - base) >> PAGE_SHIFT);
+       if (!static_key_enabled(&cpu_has_bear))
+               set_memory_x(0, 1);
+       if (debug_pagealloc_enabled()) {
+               /*
+                * Use RELOC_HIDE() as long as __va(0) translates to NULL,
+                * since performing pointer arithmetic on a NULL pointer
+                * has undefined behavior and generates compiler warnings.
+                */
+               __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
        }
-
-#ifdef CONFIG_KASAN
-       for_each_mem_range(i, &base, &end)
-               set_memory_kasan(base, end);
-#endif
-       set_memory_rox((unsigned long)_stext,
-                      (unsigned long)(_etext - _stext) >> PAGE_SHIFT);
-       set_memory_ro((unsigned long)_etext,
-                     (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT);
-       set_memory_rox((unsigned long)_sinittext,
-                      (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
-       set_memory_rox(__stext_amode31,
-                      (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
-
-       /* lowcore must be executable for LPSWE */
-       if (static_key_enabled(&cpu_has_bear))
-               set_memory_nx(0, 1);
-       set_memory_nx(PAGE_SIZE, 1);
-       if (debug_pagealloc_enabled())
-               set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
-
+       if (MACHINE_HAS_NX)
+               ctl_set_bit(0, 20);
        pr_info("Write protected kernel read-only data: %luk\n",
                (unsigned long)(__end_rodata - _stext) >> 10);
 }
index 5e9371f..de2fb12 100644 (file)
@@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
                                 */
        int r14_off;            /* Offset of saved %r14 */
        int run_ctx_off;        /* Offset of struct bpf_tramp_run_ctx */
+       int tccnt_off;          /* Offset of saved tailcall counter */
        int do_fexit;           /* do_fexit: label */
 };
 
@@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
        tjit->r14_off = alloc_stack(tjit, sizeof(u64));
        tjit->run_ctx_off = alloc_stack(tjit,
                                        sizeof(struct bpf_tramp_run_ctx));
+       tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
        /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
        tjit->stack_size -= STACK_FRAME_OVERHEAD;
        tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
 
        /* aghi %r15,-stack_size */
        EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+       /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+       _EMIT6(0xd203f000 | tjit->tccnt_off,
+              0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
        /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
        if (nr_reg_args)
                EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
                                       (nr_stack_args * sizeof(u64) - 1) << 16 |
                                       tjit->stack_args_off,
                               0xf000 | tjit->orig_stack_args_off);
+               /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+               _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
                /* lgr %r1,%r8 */
                EMIT4(0xb9040000, REG_1, REG_8);
                /* %r1() */
@@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
        if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
                EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
                              tjit->retval_off);
+       /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+       _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+              0xf000 | tjit->tccnt_off);
        /* aghi %r15,stack_size */
        EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
        /* Emit an expoline for the following indirect jump. */
index 595ca0b..43b0ae4 100644 (file)
@@ -2,6 +2,5 @@
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h
deleted file mode 100644 (file)
index 904cc6c..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ide.h: SPARC PCI specific IDE glue.
- *
- * Copyright (C) 1997  David S. Miller (davem@davemloft.net)
- * Copyright (C) 1998  Eddie C. Dost   (ecd@skynet.be)
- * Adaptation from sparc64 version to sparc by Pete Zaitcev.
- */
-
-#ifndef _SPARC_IDE_H
-#define _SPARC_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-#ifdef CONFIG_SPARC64
-#include <asm/spitfire.h>
-#include <asm/cacheflush.h>
-#include <asm/page.h>
-#else
-#include <linux/pgtable.h>
-#include <asm/psr.h>
-#endif
-
-#define __ide_insl(data_reg, buffer, wcount) \
-       __ide_insw(data_reg, buffer, (wcount)<<1)
-#define __ide_outsl(data_reg, buffer, wcount) \
-       __ide_outsw(data_reg, buffer, (wcount)<<1)
-
-/* On sparc, I/O ports and MMIO registers are accessed identically.  */
-#define __ide_mm_insw  __ide_insw
-#define __ide_mm_insl  __ide_insl
-#define __ide_mm_outsw __ide_outsw
-#define __ide_mm_outsl __ide_outsl
-
-static inline void __ide_insw(void __iomem *port, void *dst, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       unsigned long end = (unsigned long)dst + (count << 1);
-#endif
-       u16 *ps = dst;
-       u32 *pi;
-
-       if(((unsigned long)ps) & 0x2) {
-               *ps++ = __raw_readw(port);
-               count--;
-       }
-       pi = (u32 *)ps;
-       while(count >= 2) {
-               u32 w;
-
-               w  = __raw_readw(port) << 16;
-               w |= __raw_readw(port);
-               *pi++ = w;
-               count -= 2;
-       }
-       ps = (u16 *)pi;
-       if(count)
-               *ps++ = __raw_readw(port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       __flush_dcache_range((unsigned long)dst, end);
-#endif
-}
-
-static inline void __ide_outsw(void __iomem *port, const void *src, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       unsigned long end = (unsigned long)src + (count << 1);
-#endif
-       const u16 *ps = src;
-       const u32 *pi;
-
-       if(((unsigned long)src) & 0x2) {
-               __raw_writew(*ps++, port);
-               count--;
-       }
-       pi = (const u32 *)ps;
-       while(count >= 2) {
-               u32 w;
-
-               w = *pi++;
-               __raw_writew((w >> 16), port);
-               __raw_writew(w, port);
-               count -= 2;
-       }
-       ps = (const u16 *)pi;
-       if(count)
-               __raw_writew(*ps, port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       __flush_dcache_range((unsigned long)src, end);
-#endif
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC_IDE_H */
index a269ad2..a3fdee4 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/pgtable.h>
@@ -30,7 +31,6 @@
 #include <asm/unistd.h>
 
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #define curptr      g6
 
index 6044b82..964c61b 100644 (file)
@@ -11,6 +11,7 @@
  * CompactPCI platform by Eric Brower, 1999.
  */
 
+#include <linux/export.h>
 #include <linux/version.h>
 #include <linux/init.h>
 
@@ -25,7 +26,6 @@
 #include <asm/thread_info.h>   /* TI_UWINMASK */
 #include <asm/errno.h>
 #include <asm/pgtable.h>       /* PGDIR_SHIFT */
-#include <asm/export.h>
 
        .data
 /* The following are used with the prom_vector node-ops to figure out
index 72a5bdc..cf05491 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/version.h>
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -33,7 +34,6 @@
 #include <asm/estate.h>
 #include <asm/sfafsr.h>
 #include <asm/unistd.h>
-#include <asm/export.h>
 
 /* This section from from _start to sparc64_boot_end should fit into
  * 0x0000000000404000 to 0x0000000000408000.
index a6f4ee3..635398e 100644 (file)
@@ -6,10 +6,10 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 #define GLOBAL_SPARE   g7
 #else
 #define GLOBAL_SPARE   g5
index 9c8eb20..31a0c33 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 
 #include <asm/asi.h>
@@ -14,7 +15,6 @@
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
        /* On entry: %o5=current FPRS value, %g7 is callers address */
        /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
index 2d72de8..2a9e7c4 100644 (file)
@@ -6,8 +6,8 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(__ashldi3)
index 05dfda9..8fd0b31 100644 (file)
@@ -6,8 +6,8 @@
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(__ashrdi3)
index 8245d4a..4f8cab2 100644 (file)
@@ -4,10 +4,10 @@
  * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
        .text
 
index 9d647f9..9c91cbb 100644 (file)
@@ -4,10 +4,10 @@
  * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
        .text
 
index 76ddd1f..5b92959 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
        /* Zero out 64 bytes of memory at (buf + offset).
         * Assumes %g1 contains zero.
index 87fec4c..2bfa44a 100644 (file)
@@ -5,8 +5,8 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 
index 781e39b..84ad709 100644 (file)
@@ -14,8 +14,8 @@
  *     BSD4.4 portable checksum routine
  */
 
+#include <linux/export.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)        \
        ldd     [buf + offset + 0x00], t0;                      \
index 9700ef1..32b626f 100644 (file)
@@ -14,7 +14,7 @@
  *     BSD4.4 portable checksum routine
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
 csum_partial_fix_alignment:
index 302d345..e634581 100644 (file)
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
index 66e90bf..e23e6a6 100644 (file)
@@ -4,9 +4,9 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
 #define XCC xcc
 
index 5ebcfd4..7a041f3 100644 (file)
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <linux/pgtable.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
index 954572c..7bb2ef6 100644 (file)
  * Returns 0 if successful, otherwise count of bytes not copied yet
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
index d839956..f968e83 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 #ifdef __KERNEL__
 #define GLOBAL_SPARE   %g7
index a738940..4ba901a 100644 (file)
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .globl __divdi3
index 5a11d86..3a9ad8f 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .register       %g2,#scratch
 
index 06b8d30..ccf97fb 100644 (file)
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .register       %g2, #scratch
index c83e22a..87005b6 100644 (file)
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .register       %g2, #scratch
index 0ddbbb0..eebee59 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .align  32
index 531d89c..7fa8fd4 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
index 9a1289a..47a39f4 100644 (file)
@@ -7,11 +7,11 @@
  * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/psr.h>
 #include <asm/smp.h>
 #include <asm/spinlock.h>
-#include <asm/export.h>
 
        .text
        .align  4
index 509ca66..09bf581 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 ENTRY(__lshrdi3)
        cmp     %o2, 0
index deba6fa..f7f7910 100644 (file)
@@ -6,8 +6,8 @@
  * This can also be tweaked for kernel stack overflow detection.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 /*
  * This is the main variant and is called by C code.  GCC's -pg option
index a18076e..c87e800 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
        .text
 ENTRY(memcmp)
index ee823d8..57b1ae0 100644 (file)
@@ -8,7 +8,8 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
+
 #define FUNC(x)                \
        .globl  x;              \
        .type   x,@function;    \
index 3132b63..543dda7 100644 (file)
@@ -5,8 +5,8 @@
  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
index c4c2d5b..5386a3a 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* In essence, this is just a fancy strlen. */
 
index 36dd638..70a4f21 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  */
 
-       #include <asm/export.h>
+#include <linux/export.h>
 
 #define HI_MAGIC       0x8080808080808080
 #define LO_MAGIC       0x0101010101010101
index eaff682..a33419d 100644 (file)
@@ -9,8 +9,8 @@
  * clear_user.
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
index 53054de..7e1e8cd 100644 (file)
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .globl __muldi3
index 2f187b2..5bb4c12 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .align  4
index dd111bb..27478b3 100644 (file)
@@ -6,9 +6,9 @@
  * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
 #define LO_MAGIC 0x01010101
 #define HI_MAGIC 0x80808080
index 794733f..387bbf6 100644 (file)
@@ -4,8 +4,8 @@
  *            generic strncmp routine.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
index 3d37d65..76c1207 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
index f6af7c7..35461e3 100644 (file)
@@ -9,12 +9,12 @@
  * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/dcu.h>
 #include <asm/spitfire.h>
-#include <asm/export.h>
 
 /*
  *     Requirements:
index 0d41c94..b44d79d 100644 (file)
@@ -128,6 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
                        goto no_cache_flush;
 
                /* A real file page? */
+               folio = page_folio(page);
                mapping = folio_flush_mapping(folio);
                if (!mapping)
                        goto no_cache_flush;
index 95315d3..5bfe5ca 100644 (file)
@@ -335,9 +335,5 @@ define archhelp
   echo  '                        bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  '                        FDARGS="..."  arguments for the booted kernel'
   echo  '                        FDINITRD=file initrd for the booted kernel'
-  echo  ''
-  echo  '  kvm_guest.config    - Enable Kconfig items for running this kernel as a KVM guest'
-  echo  '  xen.config          - Enable Kconfig items for running this kernel as a Xen guest'
-  echo  '  x86_debug.config    - Enable tip tree debugging options for testing'
 
 endef
index 2061ed1..58cb949 100644 (file)
 #define X86_FEATURE_SEV_ES             (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_V_TSC_AUX          (19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT       (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP         (19*32+14) /* AMD SEV-ES full debug state swap support */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP  (20*32+ 0) /* "" No Nested Data Breakpoints */
index 3be6a98..c9f6a6c 100644 (file)
@@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
 #endif
 #endif
 
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
 #ifdef CONFIG_CRASH_HOTPLUG
index 3bc146d..1a4def3 100644 (file)
@@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
  * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
  * also includes TDP pages) to determine whether or not a page can be used in
  * the given MMU context.  This is a subset of the overall kvm_cpu_role to
- * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
- * 2 bytes per gfn instead of 4 bytes per gfn.
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
  *
  * Upper-level shadow pages having gptes are tracked for write-protection via
- * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
- * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
- * gfn_track will overflow and explosions will ensure.
+ * gfn_write_track.  As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
  *
  * A unique shadow page (SP) for a gfn is created if and only if an existing SP
  * cannot be reused.  The ability to reuse a SP is tracked by its role, which
@@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
        u64 smi_count;
        bool at_instruction_boundary;
        bool tpr_access_reporting;
-       bool xsaves_enabled;
        bool xfd_no_write_intercept;
        u64 ia32_xss;
        u64 microcode_version;
@@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
        struct kvm_cpuid_entry2 *cpuid_entries;
        struct kvm_hypervisor_cpuid kvm_cpuid;
 
+       /*
+        * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+        * when "struct kvm_vcpu_arch" is no longer defined in an
+        * arch/x86/include/asm header.  The max is mostly arbitrary, i.e.
+        * can be increased as necessary.
+        */
+#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+
+       /*
+        * Track whether or not the guest is allowed to use features that are
+        * governed by KVM, where "governed" means KVM needs to manage state
+        * and/or explicitly enable the feature in hardware.  Typically, but
+        * not always, governed features can be used by the guest if and only
+        * if both KVM and userspace want to expose the feature to the guest.
+        */
+       struct {
+               DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+       } governed_features;
+
        u64 reserved_gpa_bits;
        int maxphyaddr;
 
@@ -1005,7 +1023,7 @@ struct kvm_lpage_info {
 struct kvm_arch_memory_slot {
        struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
        struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
-       unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+       unsigned short *gfn_write_track;
 };
 
 /*
@@ -1247,8 +1265,9 @@ struct kvm_arch {
         * create an NX huge page (without hanging the guest).
         */
        struct list_head possible_nx_huge_pages;
-       struct kvm_page_track_notifier_node mmu_sp_tracker;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
        struct kvm_page_track_notifier_head track_notifier_head;
+#endif
        /*
         * Protects marking pages unsync during page faults, as TDP MMU page
         * faults only take mmu_lock for read.  For simplicity, the unsync
@@ -1655,8 +1674,8 @@ struct kvm_x86_ops {
 
        u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
        u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
-       void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
-       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+       void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
 
        /*
         * Retrieve somewhat arbitrary exit information.  Intended to
@@ -1795,8 +1814,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 #define __KVM_HAVE_ARCH_VM_FREE
 void kvm_arch_free_vm(struct kvm *kvm);
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        if (kvm_x86_ops.flush_remote_tlbs &&
            !static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1805,6 +1824,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
                return -ENOTSUPP;
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 #define kvm_arch_pmi_in_guest(vcpu) \
        ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 
@@ -1833,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
index eb186bc..3d04074 100644 (file)
@@ -2,11 +2,9 @@
 #ifndef _ASM_X86_KVM_PAGE_TRACK_H
 #define _ASM_X86_KVM_PAGE_TRACK_H
 
-enum kvm_page_track_mode {
-       KVM_PAGE_TRACK_WRITE,
-       KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 /*
  * The notifier represented by @kvm_page_track_notifier_node is linked into
  * the head which will be notified when guest is triggering the track event.
@@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
         * It is called when guest is writing the write-tracked page
         * and write emulation is finished at that time.
         *
-        * @vcpu: the vcpu where the write access happened.
         * @gpa: the physical address written by guest.
         * @new: the data was written to the address.
         * @bytes: the written length.
         * @node: this node
         */
-       void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                           int bytes, struct kvm_page_track_notifier_node *node);
+       void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+                           struct kvm_page_track_notifier_node *node);
+
        /*
-        * It is called when memory slot is being moved or removed
-        * users can drop write-protection for the pages in that memory slot
+        * Invoked when a memory region is removed from the guest.  Or in KVM
+        * terms, when a memslot is deleted.
         *
-        * @kvm: the kvm where memory slot being moved or removed
-        * @slot: the memory slot being moved or removed
-        * @node: this node
+        * @gfn:       base gfn of the region being removed
+        * @nr_pages:  number of pages in the to-be-removed region
+        * @node:      this node
         */
-       void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
-                           struct kvm_page_track_notifier_node *node);
+       void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+                                   struct kvm_page_track_notifier_node *node);
 };
 
-int kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+                                    struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+                                       struct kvm_page_track_notifier_node *n);
 
-bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
-int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot,
-                                 unsigned long npages);
-
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot, gfn_t gfn,
-                                 enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot, gfn_t gfn,
-                                    enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-                                  const struct kvm_memory_slot *slot,
-                                  gfn_t gfn, enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
 
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-                                struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-                                  struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                         int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
index 9177b43..6536873 100644 (file)
@@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
 #define MRR_BIOS       0
 #define MRR_APM                1
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
 void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
index e7c7379..19bf955 100644 (file)
@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
 
 #define AVIC_HPA_MASK  ~((0xFFFULL << 52) | 0xFFF)
 
+#define SVM_SEV_FEAT_DEBUG_SWAP                        BIT(5)
 
 struct vmcb_seg {
        u16 selector;
@@ -345,7 +346,7 @@ struct vmcb_save_area {
        u64 last_excp_from;
        u64 last_excp_to;
        u8 reserved_0x298[72];
-       u32 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
+       u64 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
 } __packed;
 
 /* Save area definition for SEV-ES and SEV-SNP guests */
@@ -512,7 +513,7 @@ struct ghcb {
 } __packed;
 
 
-#define EXPECTED_VMCB_SAVE_AREA_SIZE           740
+#define EXPECTED_VMCB_SAVE_AREA_SIZE           744
 #define EXPECTED_GHCB_SAVE_AREA_SIZE           1032
 #define EXPECTED_SEV_ES_SAVE_AREA_SIZE         1648
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE                1024
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644 (file)
index 3b12e6b..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
-       unsigned long ecx = cpuid_ecx(1);
-       return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/**
- * cpu_vmxoff() - Disable VMX on the current CPU
- *
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static inline int cpu_vmxoff(void)
-{
-       asm_volatile_goto("1: vmxoff\n\t"
-                         _ASM_EXTABLE(1b, %l[fault])
-                         ::: "cc", "memory" : fault);
-
-       cr4_clear_bits(X86_CR4_VMXE);
-       return 0;
-
-fault:
-       cr4_clear_bits(X86_CR4_VMXE);
-       return -EIO;
-}
-
-static inline int cpu_vmx_enabled(void)
-{
-       return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
-       if (cpu_vmx_enabled())
-               cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-       if (cpu_has_vmx())
-               __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
-               if (msg)
-                       *msg = "not amd or hygon";
-               return 0;
-       }
-
-       if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
-               if (msg)
-                       *msg = "can't execute cpuid_8000000a";
-               return 0;
-       }
-
-       if (!boot_cpu_has(X86_FEATURE_SVM)) {
-               if (msg)
-                       *msg = "svm not available";
-               return 0;
-       }
-       return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
-       uint64_t efer;
-
-       wrmsrl(MSR_VM_HSAVE_PA, 0);
-       rdmsrl(MSR_EFER, efer);
-       if (efer & EFER_SVME) {
-               /*
-                * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
-                * aren't blocked, e.g. if a fatal error occurred between CLGI
-                * and STGI.  Note, STGI may #UD if SVM is disabled from NMI
-                * context between reading EFER and executing STGI.  In that
-                * case, GIF must already be set, otherwise the NMI would have
-                * been blocked, so just eat the fault.
-                */
-               asm_volatile_goto("1: stgi\n\t"
-                                 _ASM_EXTABLE(1b, %l[fault])
-                                 ::: "memory" : fault);
-fault:
-               wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-       }
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
-       if (cpu_has_svm(NULL))
-               cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
index 0d02c4a..0e73616 100644 (file)
@@ -71,7 +71,7 @@
 #define SECONDARY_EXEC_RDSEED_EXITING          VMCS_CONTROL_BIT(RDSEED_EXITING)
 #define SECONDARY_EXEC_ENABLE_PML               VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
 #define SECONDARY_EXEC_PT_CONCEAL_VMX          VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
-#define SECONDARY_EXEC_XSAVES                  VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_ENABLE_XSAVES           VMCS_CONTROL_BIT(XSAVES)
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC     VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
 #define SECONDARY_EXEC_PT_USE_GPA              VMCS_CONTROL_BIT(PT_USE_GPA)
 #define SECONDARY_EXEC_TSC_SCALING              VMCS_CONTROL_BIT(TSC_SCALING)
index 587c774..c92d886 100644 (file)
@@ -48,27 +48,6 @@ struct crash_memmap_data {
        unsigned int type;
 };
 
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
-       crash_vmclear_fn *do_vmclear_operation = NULL;
-
-       rcu_read_lock();
-       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
-       if (do_vmclear_operation)
-               do_vmclear_operation();
-       rcu_read_unlock();
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -76,11 +55,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
        crash_save_cpu(regs, cpu);
 
        /*
-        * VMCLEAR VMCSs loaded on all cpus if needed.
-        */
-       cpu_crash_vmclear_loaded_vmcss();
-
-       /*
         * Disable Intel PT to stop its logging
         */
        cpu_emergency_stop_pt();
@@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
        crash_smp_send_stop();
 
-       /*
-        * VMCLEAR VMCSs loaded on this cpu if needed.
-        */
-       cpu_crash_vmclear_loaded_vmcss();
-
        cpu_emergency_disable_virtualization();
 
        /*
index 3adbe97..830425e 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 #include <asm/pci_x86.h>
-#include <asm/virtext.h>
 #include <asm/cpu.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
@@ -530,9 +529,54 @@ static inline void kb_wait(void)
 
 static inline void nmi_shootdown_cpus_on_restart(void);
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+/* RCU-protected callback to disable virtualization prior to reboot. */
+static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
+
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
+{
+       if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
+               return;
+
+       rcu_assign_pointer(cpu_emergency_virt_callback, callback);
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
+
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
+{
+       if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
+               return;
+
+       rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
+       synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
+
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+       cpu_emergency_virt_cb *callback;
+
+       /*
+        * IRQs must be disabled as KVM enables virtualization in hardware via
+        * function call IPIs, i.e. IRQs need to be disabled to guarantee
+        * virtualization stays disabled.
+        */
+       lockdep_assert_irqs_disabled();
+
+       rcu_read_lock();
+       callback = rcu_dereference(cpu_emergency_virt_callback);
+       if (callback)
+               callback();
+       rcu_read_unlock();
+}
+
 static void emergency_reboot_disable_virtualization(void)
 {
-       /* Just make sure we won't change CPUs while doing this */
        local_irq_disable();
 
        /*
@@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void)
         * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
         * other CPUs may have virtualization enabled.
         */
-       if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+       if (rcu_access_pointer(cpu_emergency_virt_callback)) {
                /* Safely force _this_ CPU out of VMX/SVM operation. */
                cpu_emergency_disable_virtualization();
 
@@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void)
                nmi_shootdown_cpus_on_restart();
        }
 }
-
+#else
+static void emergency_reboot_disable_virtualization(void) { }
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
@@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-
 /* This is the CPU performing the emergency shutdown work. */
 int crashing_cpu = -1;
 
-/*
- * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
- * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
- * GIF=0, i.e. if the crash occurred between CLGI and STGI.
- */
-void cpu_emergency_disable_virtualization(void)
-{
-       cpu_emergency_vmxoff();
-       cpu_emergency_svm_disable();
-}
-
 #if defined(CONFIG_SMP)
 
 static nmi_shootdown_cb shootdown_callback;
index 89ca7f4..ed90f14 100644 (file)
@@ -101,7 +101,7 @@ config X86_SGX_KVM
 
 config KVM_AMD
        tristate "KVM for AMD processors support"
-       depends on KVM
+       depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
        help
          Provides support for KVM on AMD processors equipped with the AMD-V
          (SVM) extensions.
@@ -138,6 +138,19 @@ config KVM_XEN
 
          If in doubt, say "N".
 
+config KVM_PROVE_MMU
+       bool "Prove KVM MMU correctness"
+       depends on DEBUG_KERNEL
+       depends on KVM
+       depends on EXPERT
+       help
+         Enables runtime assertions in KVM's MMU that are too costly to enable
+         in anything remotely resembling a production environment, e.g. this
+         gates code that verifies a to-be-freed page table doesn't have any
+         present SPTEs.
+
+         If in doubt, say "N".
+
 config KVM_EXTERNAL_WRITE_TRACKING
        bool
 
index d343268..0544e30 100644 (file)
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kvm_host.h>
+#include "linux/lockdep.h"
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
@@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
        struct kvm_cpuid_entry2 *e;
        int i;
 
+       /*
+        * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+        * with IRQs disabled is disallowed.  The CPUID model can legitimately
+        * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+        * typically disabled in KVM only when KVM is in a performance critical
+        * path, e.g. the core VM-Enter/VM-Exit run loop.  Nothing will break
+        * if this rule is violated, this assertion is purely to flag potential
+        * performance issues.  If this fires, consider moving the lookup out
+        * of the hotpath, e.g. by caching information during CPUID updates.
+        */
+       lockdep_assert_irqs_enabled();
+
        for (i = 0; i < nent; i++) {
                e = &entries[i];
 
@@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        struct kvm_cpuid_entry2 *best;
+       bool allow_gbpages;
+
+       BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+       bitmap_zero(vcpu->arch.governed_features.enabled,
+                   KVM_MAX_NR_GOVERNED_FEATURES);
+
+       /*
+        * If TDP is enabled, let the guest use GBPAGES if they're supported in
+        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
+        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+        * walk for performance and complexity reasons.  Not to mention KVM
+        * _can't_ solve the problem because GVA->GPA walks aren't visible to
+        * KVM once a TDP translation is installed.  Mimic hardware behavior so
+        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+        * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+        * and can install smaller shadow pages if the host lacks 1GiB support.
+        */
+       allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+                                     guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+       if (allow_gbpages)
+               kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
 
        best = kvm_find_cpuid_entry(vcpu, 1);
        if (best && apic) {
@@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void)
        );
 
        kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
+               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+               F(AMX_COMPLEX)
        );
 
        kvm_cpu_cap_mask(CPUID_D_1_EAX,
@@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                cpuid_entry_override(entry, CPUID_8000_0001_EDX);
                cpuid_entry_override(entry, CPUID_8000_0001_ECX);
                break;
+       case 0x80000005:
+               /*  Pass host L1 cache and TLB info. */
+               break;
        case 0x80000006:
                /* Drop reserved bits, pass host L2 cache and TLB info. */
                entry->edx &= ~GENMASK(17, 16);
index b1658c0..284fa47 100644 (file)
@@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
        return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
 }
 
+enum kvm_governed_features {
+#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
+#include "governed_features.h"
+       KVM_NR_GOVERNED_FEATURES
+};
+
+static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
+{
+       switch (x86_feature) {
+#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
+#include "governed_features.h"
+       default:
+               return -1;
+       }
+}
+
+static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
+{
+       return kvm_governed_feature_index(x86_feature) >= 0;
+}
+
+static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
+                                                    unsigned int x86_feature)
+{
+       BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+       __set_bit(kvm_governed_feature_index(x86_feature),
+                 vcpu->arch.governed_features.enabled);
+}
+
+static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
+                                                              unsigned int x86_feature)
+{
+       if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
+               kvm_governed_feature_set(vcpu, x86_feature);
+}
+
+static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
+                                         unsigned int x86_feature)
+{
+       BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+       return test_bit(kvm_governed_feature_index(x86_feature),
+                       vcpu->arch.governed_features.enabled);
+}
+
 #endif
index 936a397..2673cd5 100644 (file)
@@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
                                               op->addr.mem,
                                               &op->val,
                                               op->bytes);
-               break;
        case OP_MEM_STR:
                return segmented_write(ctxt,
                                       op->addr.mem,
                                       op->data,
                                       op->bytes * op->count);
-               break;
        case OP_XMM:
                kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
                break;
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
new file mode 100644 (file)
index 0000000..423a733
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
+BUILD_BUG()
+#endif
+
+#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
+
+KVM_GOVERNED_X86_FEATURE(GBPAGES)
+KVM_GOVERNED_X86_FEATURE(XSAVES)
+KVM_GOVERNED_X86_FEATURE(VMX)
+KVM_GOVERNED_X86_FEATURE(NRIPS)
+KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
+KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
+KVM_GOVERNED_X86_FEATURE(LBRV)
+KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
+KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
+KVM_GOVERNED_X86_FEATURE(VGIF)
+KVM_GOVERNED_X86_FEATURE(VNMI)
+
+#undef KVM_GOVERNED_X86_FEATURE
+#undef KVM_GOVERNED_FEATURE
index b28fd02..7c2dac6 100644 (file)
@@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
        case HV_X64_MSR_VP_ASSIST_PAGE:
                return hv_vcpu->cpuid_cache.features_eax &
                        HV_MSR_APIC_ACCESS_AVAILABLE;
-               break;
        case HV_X64_MSR_TSC_FREQUENCY:
        case HV_X64_MSR_APIC_FREQUENCY:
                return hv_vcpu->cpuid_cache.features_eax &
index ab65f3a..be7aeb9 100644 (file)
@@ -213,7 +213,6 @@ struct x86_emulate_ops {
 
        bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
                          u32 *ecx, u32 *edx, bool exact_only);
-       bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
index a983a16..dcd60b3 100644 (file)
@@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
        struct kvm_vcpu *vcpu;
        unsigned long i;
        u32 max_id = 255; /* enough space for any xAPIC ID */
-       bool xapic_id_mismatch = false;
+       bool xapic_id_mismatch;
+       int r;
 
        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
@@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                  "Dirty APIC map without an in-kernel local APIC");
 
        mutex_lock(&kvm->arch.apic_map_lock);
+
+retry:
        /*
-        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
-        * (if clean) or the APIC registers (if dirty).
+        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
+        * or the APIC registers (if dirty).  Note, on retry the map may have
+        * not yet been marked dirty by whatever task changed a vCPU's x2APIC
+        * ID, i.e. the map may still show up as in-progress.  In that case
+        * this task still needs to retry and complete its calculation.
         */
        if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
                                   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
@@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                return;
        }
 
+       /*
+        * Reset the mismatch flag between attempts so that KVM does the right
+        * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
+        * keep max_id strictly increasing.  Disallowing max_id from shrinking
+        * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
+        * with the highest x2APIC ID is toggling its APIC on and off.
+        */
+       xapic_id_mismatch = false;
+
        kvm_for_each_vcpu(i, vcpu, kvm)
                if (kvm_apic_present(vcpu))
                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
@@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                if (!kvm_apic_present(vcpu))
                        continue;
 
-               if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+               r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
+               if (r) {
                        kvfree(new);
                        new = NULL;
+                       if (r == -E2BIG) {
+                               cond_resched();
+                               goto retry;
+                       }
+
                        goto out;
                }
 
index 92d5a19..253fb20 100644 (file)
@@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+                        int bytes);
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
index ec169f5..e1d011c 100644 (file)
@@ -25,6 +25,7 @@
 #include "kvm_cache_regs.h"
 #include "smm.h"
 #include "kvm_emulate.h"
+#include "page_track.h"
 #include "cpuid.h"
 #include "spte.h"
 
@@ -53,7 +54,7 @@
 #include <asm/io.h>
 #include <asm/set_memory.h>
 #include <asm/vmx.h>
-#include <asm/kvm_page_track.h>
+
 #include "trace.h"
 
 extern bool itlb_multihit_kvm_mitigation;
@@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly;
 static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
-#ifdef MMU_DEBUG
-bool dbg = 0;
-module_param(dbg, bool, 0644);
-#endif
-
 #define PTE_PREFETCH_NUM               8
 
 #include <trace/events/kvm.h>
@@ -278,16 +274,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
        return kvm_x86_ops.flush_remote_tlbs_range;
 }
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
 {
-       int ret = -EOPNOTSUPP;
+       if (!kvm_x86_ops.flush_remote_tlbs_range)
+               return -EOPNOTSUPP;
 
-       if (kvm_x86_ops.flush_remote_tlbs_range)
-               ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
-                                                                  nr_pages);
-       if (ret)
-               kvm_flush_remote_tlbs(kvm);
+       return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
 }
 
 static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -490,7 +482,7 @@ retry:
  */
 static void mmu_spte_set(u64 *sptep, u64 new_spte)
 {
-       WARN_ON(is_shadow_present_pte(*sptep));
+       WARN_ON_ONCE(is_shadow_present_pte(*sptep));
        __set_spte(sptep, new_spte);
 }
 
@@ -502,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
 {
        u64 old_spte = *sptep;
 
-       WARN_ON(!is_shadow_present_pte(new_spte));
+       WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
        check_spte_writable_invariants(new_spte);
 
        if (!is_shadow_present_pte(old_spte)) {
@@ -515,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
        else
                old_spte = __update_clear_spte_slow(sptep, new_spte);
 
-       WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
+       WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
 
        return old_spte;
 }
@@ -597,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
         * by a refcounted page, the refcount is elevated.
         */
        page = kvm_pfn_to_refcounted_page(pfn);
-       WARN_ON(page && !page_count(page));
+       WARN_ON_ONCE(page && !page_count(page));
 
        if (is_accessed_spte(old_spte))
                kvm_set_pfn_accessed(pfn);
@@ -812,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
        for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
                linfo = lpage_info_slot(gfn, slot, i);
                linfo->disallow_lpage += count;
-               WARN_ON(linfo->disallow_lpage < 0);
+               WARN_ON_ONCE(linfo->disallow_lpage < 0);
        }
 }
 
@@ -839,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
        /* the non-leaf shadow pages are keeping readonly. */
        if (sp->role.level > PG_LEVEL_4K)
-               return kvm_slot_page_track_add_page(kvm, slot, gfn,
-                                                   KVM_PAGE_TRACK_WRITE);
+               return __kvm_write_track_add_gfn(kvm, slot, gfn);
 
        kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
@@ -886,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
        slots = kvm_memslots_for_spte_role(kvm, sp->role);
        slot = __gfn_to_memslot(slots, gfn);
        if (sp->role.level > PG_LEVEL_4K)
-               return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-                                                      KVM_PAGE_TRACK_WRITE);
+               return __kvm_write_track_remove_gfn(kvm, slot, gfn);
 
        kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
@@ -941,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
        int count = 0;
 
        if (!rmap_head->val) {
-               rmap_printk("%p %llx 0->1\n", spte, *spte);
                rmap_head->val = (unsigned long)spte;
        } else if (!(rmap_head->val & 1)) {
-               rmap_printk("%p %llx 1->many\n", spte, *spte);
                desc = kvm_mmu_memory_cache_alloc(cache);
                desc->sptes[0] = (u64 *)rmap_head->val;
                desc->sptes[1] = spte;
@@ -953,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
                rmap_head->val = (unsigned long)desc | 1;
                ++count;
        } else {
-               rmap_printk("%p %llx many->many\n", spte, *spte);
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
                count = desc->tail_count + desc->spte_count;
 
@@ -973,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
        return count;
 }
 
-static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm,
+                                      struct kvm_rmap_head *rmap_head,
                                       struct pte_list_desc *desc, int i)
 {
        struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
@@ -984,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
         * when adding an entry and the previous head is full, and heads are
         * removed (this flow) when they become empty.
         */
-       BUG_ON(j < 0);
+       KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
 
        /*
         * Replace the to-be-freed SPTE with the last valid entry from the head
@@ -1009,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
        mmu_free_pte_list_desc(head_desc);
 }
 
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void pte_list_remove(struct kvm *kvm, u64 *spte,
+                           struct kvm_rmap_head *rmap_head)
 {
        struct pte_list_desc *desc;
        int i;
 
-       if (!rmap_head->val) {
-               pr_err("%s: %p 0->BUG\n", __func__, spte);
-               BUG();
-       } else if (!(rmap_head->val & 1)) {
-               rmap_printk("%p 1->0\n", spte);
-               if ((u64 *)rmap_head->val != spte) {
-                       pr_err("%s:  %p 1->BUG\n", __func__, spte);
-                       BUG();
-               }
+       if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
+               return;
+
+       if (!(rmap_head->val & 1)) {
+               if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
+                       return;
+
                rmap_head->val = 0;
        } else {
-               rmap_printk("%p many->many\n", spte);
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
                while (desc) {
                        for (i = 0; i < desc->spte_count; ++i) {
                                if (desc->sptes[i] == spte) {
-                                       pte_list_desc_remove_entry(rmap_head, desc, i);
+                                       pte_list_desc_remove_entry(kvm, rmap_head,
+                                                                  desc, i);
                                        return;
                                }
                        }
                        desc = desc->more;
                }
-               pr_err("%s: %p many->many\n", __func__, spte);
-               BUG();
+
+               KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
        }
 }
 
@@ -1045,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm,
                                  struct kvm_rmap_head *rmap_head, u64 *sptep)
 {
        mmu_spte_clear_track_bits(kvm, sptep);
-       pte_list_remove(sptep, rmap_head);
+       pte_list_remove(kvm, sptep, rmap_head);
 }
 
 /* Return true if at least one SPTE was zapped, false otherwise */
@@ -1120,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        slot = __gfn_to_memslot(slots, gfn);
        rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
-       pte_list_remove(spte, rmap_head);
+       pte_list_remove(kvm, spte, rmap_head);
 }
 
 /*
@@ -1212,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
        struct kvm_mmu_page *sp;
 
        sp = sptep_to_sp(sptep);
-       WARN_ON(sp->role.level == PG_LEVEL_4K);
+       WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
 
        drop_spte(kvm, sptep);
 
@@ -1241,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
            !(pt_protect && is_mmu_writable_spte(spte)))
                return false;
 
-       rmap_printk("spte %p %llx\n", sptep, *sptep);
-
        if (pt_protect)
                spte &= ~shadow_mmu_writable_mask;
        spte = spte & ~PT_WRITABLE_MASK;
@@ -1267,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep)
 {
        u64 spte = *sptep;
 
-       rmap_printk("spte %p %llx\n", sptep, *sptep);
-
-       MMU_WARN_ON(!spte_ad_enabled(spte));
+       KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
        spte &= ~shadow_dirty_mask;
        return mmu_spte_update(sptep, spte);
 }
@@ -1475,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
        u64 new_spte;
        kvm_pfn_t new_pfn;
 
-       WARN_ON(pte_huge(pte));
+       WARN_ON_ONCE(pte_huge(pte));
        new_pfn = pte_pfn(pte);
 
 restart:
        for_each_rmap_spte(rmap_head, &iter, sptep) {
-               rmap_printk("spte %p %llx gfn %llx (%d)\n",
-                           sptep, *sptep, gfn, level);
-
                need_flush = true;
 
                if (pte_write(pte)) {
@@ -1588,7 +1568,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
        for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
                                 range->start, range->end - 1, &iterator)
                ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
-                              iterator.level, range->pte);
+                              iterator.level, range->arg.pte);
 
        return ret;
 }
@@ -1710,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
        return young;
 }
 
-#ifdef MMU_DEBUG
-static int is_empty_shadow_page(u64 *spt)
+static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
 {
-       u64 *pos;
-       u64 *end;
+#ifdef CONFIG_KVM_PROVE_MMU
+       int i;
 
-       for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
-               if (is_shadow_present_pte(*pos)) {
-                       printk(KERN_ERR "%s: %p %llx\n", __func__,
-                              pos, *pos);
-                       return 0;
-               }
-       return 1;
-}
+       for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
+               if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
+                       pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
+                                          sp->spt[i], &sp->spt[i],
+                                          kvm_mmu_page_get_gfn(sp, i));
+       }
 #endif
+}
 
 /*
  * This value is the sum of all of the kvm instances's
@@ -1752,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
 {
-       MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
+       kvm_mmu_check_sptes_at_free(sp);
+
        hlist_del(&sp->hash_link);
        list_del(&sp->link);
        free_page((unsigned long)sp->spt);
@@ -1775,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
        pte_list_add(cache, parent_pte, &sp->parent_ptes);
 }
 
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
+static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                                       u64 *parent_pte)
 {
-       pte_list_remove(parent_pte, &sp->parent_ptes);
+       pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
 }
 
-static void drop_parent_pte(struct kvm_mmu_page *sp,
+static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                            u64 *parent_pte)
 {
-       mmu_page_remove_parent_pte(sp, parent_pte);
+       mmu_page_remove_parent_pte(kvm, sp, parent_pte);
        mmu_spte_clear_no_track(parent_pte);
 }
 
@@ -1840,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
 {
        --sp->unsync_children;
-       WARN_ON((int)sp->unsync_children < 0);
+       WARN_ON_ONCE((int)sp->unsync_children < 0);
        __clear_bit(idx, sp->unsync_child_bitmap);
 }
 
@@ -1898,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-       WARN_ON(!sp->unsync);
+       WARN_ON_ONCE(!sp->unsync);
        trace_kvm_mmu_sync_page(sp);
        sp->unsync = 0;
        --kvm->stat.mmu_unsync;
@@ -2073,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
        if (pvec->nr == 0)
                return 0;
 
-       WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+       WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
 
        sp = pvec->page[0].sp;
        level = sp->role.level;
-       WARN_ON(level == PG_LEVEL_4K);
+       WARN_ON_ONCE(level == PG_LEVEL_4K);
 
        parents->parent[level-2] = sp;
 
@@ -2099,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
                if (!sp)
                        return;
 
-               WARN_ON(idx == INVALID_INDEX);
+               WARN_ON_ONCE(idx == INVALID_INDEX);
                clear_unsync_child_bit(sp, idx);
                level++;
        } while (!sp->unsync_children);
@@ -2220,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
                        if (ret < 0)
                                break;
 
-                       WARN_ON(!list_empty(&invalid_list));
+                       WARN_ON_ONCE(!list_empty(&invalid_list));
                        if (ret > 0)
                                kvm_flush_remote_tlbs(kvm);
                }
@@ -2499,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                if (child->role.access == direct_access)
                        return;
 
-               drop_parent_pte(child, sptep);
+               drop_parent_pte(vcpu->kvm, child, sptep);
                kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
        }
 }
@@ -2517,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                        drop_spte(kvm, spte);
                } else {
                        child = spte_to_child_sp(pte);
-                       drop_parent_pte(child, spte);
+                       drop_parent_pte(kvm, child, spte);
 
                        /*
                         * Recursively zap nested TDP SPs, parentless SPs are
@@ -2548,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
        return zapped;
 }
 
-static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
+static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        u64 *sptep;
        struct rmap_iterator iter;
 
        while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
-               drop_parent_pte(sp, sptep);
+               drop_parent_pte(kvm, sp, sptep);
 }
 
 static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -2593,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
        ++kvm->stat.mmu_shadow_zapped;
        *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
        *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
-       kvm_mmu_unlink_parents(sp);
+       kvm_mmu_unlink_parents(kvm, sp);
 
        /* Zapping children means active_mmu_pages has become unstable. */
        list_unstable = *nr_zapped;
@@ -2675,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
        kvm_flush_remote_tlbs(kvm);
 
        list_for_each_entry_safe(sp, nsp, invalid_list, link) {
-               WARN_ON(!sp->role.invalid || sp->root_count);
+               WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
                kvm_mmu_free_shadow_page(sp);
        }
 }
@@ -2775,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
        LIST_HEAD(invalid_list);
        int r;
 
-       pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
        r = 0;
        write_lock(&kvm->mmu_lock);
        for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
-               pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
-                        sp->role.word);
                r = 1;
                kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
        }
@@ -2831,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
         * track machinery is used to write-protect upper-level shadow pages,
         * i.e. this guards the role.level == 4K assertion below!
         */
-       if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
                return -EPERM;
 
        /*
@@ -2873,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
                                continue;
                }
 
-               WARN_ON(sp->role.level != PG_LEVEL_4K);
+               WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
                kvm_unsync_page(kvm, sp);
        }
        if (locked)
@@ -2938,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
        bool prefetch = !fault || fault->prefetch;
        bool write_fault = fault && fault->write;
 
-       pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
-                *sptep, write_fault, gfn);
-
        if (unlikely(is_noslot_pfn(pfn))) {
                vcpu->stat.pf_mmio_spte_created++;
                mark_mmio_spte(vcpu, sptep, gfn, pte_access);
@@ -2957,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
                        u64 pte = *sptep;
 
                        child = spte_to_child_sp(pte);
-                       drop_parent_pte(child, sptep);
+                       drop_parent_pte(vcpu->kvm, child, sptep);
                        flush = true;
                } else if (pfn != spte_to_pfn(*sptep)) {
-                       pgprintk("hfn old %llx new %llx\n",
-                                spte_to_pfn(*sptep), pfn);
                        drop_spte(vcpu->kvm, sptep);
                        flush = true;
                } else
@@ -2986,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
        if (flush)
                kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
 
-       pgprintk("%s: setting spte %llx\n", __func__, *sptep);
-
        if (!was_rmapped) {
                WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
                rmap_add(vcpu, slot, sptep, gfn, pte_access);
@@ -3033,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
        u64 *spte, *start = NULL;
        int i;
 
-       WARN_ON(!sp->role.direct);
+       WARN_ON_ONCE(!sp->role.direct);
 
        i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
        spte = sp->spt + i;
@@ -3574,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
        if (!VALID_PAGE(*root_hpa))
                return;
 
-       /*
-        * The "root" may be a special root, e.g. a PAE entry, treat it as a
-        * SPTE to ensure any non-PA bits are dropped.
-        */
-       sp = spte_to_child_sp(*root_hpa);
-       if (WARN_ON(!sp))
+       sp = root_to_sp(*root_hpa);
+       if (WARN_ON_ONCE(!sp))
                return;
 
        if (is_tdp_mmu_page(sp))
@@ -3624,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                                           &invalid_list);
 
        if (free_active_root) {
-               if (to_shadow_page(mmu->root.hpa)) {
+               if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
+                       /* Nothing to cleanup for dummy roots. */
+               } else if (root_to_sp(mmu->root.hpa)) {
                        mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
                } else if (mmu->pae_root) {
                        for (i = 0; i < 4; ++i) {
@@ -3648,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 {
        unsigned long roots_to_free = 0;
+       struct kvm_mmu_page *sp;
        hpa_t root_hpa;
        int i;
 
@@ -3662,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
                if (!VALID_PAGE(root_hpa))
                        continue;
 
-               if (!to_shadow_page(root_hpa) ||
-                       to_shadow_page(root_hpa)->role.guest_mode)
+               sp = root_to_sp(root_hpa);
+               if (!sp || sp->role.guest_mode)
                        roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
        }
 
@@ -3671,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
 
-
-static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
-{
-       int ret = 0;
-
-       if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               ret = 1;
-       }
-
-       return ret;
-}
-
 static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
                            u8 level)
 {
@@ -3821,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
        root_gfn = root_pgd >> PAGE_SHIFT;
 
-       if (mmu_check_root(vcpu, root_gfn))
-               return 1;
+       if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
+               mmu->root.hpa = kvm_mmu_get_dummy_root();
+               return 0;
+       }
 
        /*
         * On SVM, reading PDPTRs might access guest memory, which might fault
@@ -3834,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                        if (!(pdptrs[i] & PT_PRESENT_MASK))
                                continue;
 
-                       if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
-                               return 1;
+                       if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
+                               pdptrs[i] = 0;
                }
        }
 
@@ -4002,7 +3959,7 @@ static bool is_unsync_root(hpa_t root)
 {
        struct kvm_mmu_page *sp;
 
-       if (!VALID_PAGE(root))
+       if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
                return false;
 
        /*
@@ -4018,7 +3975,7 @@ static bool is_unsync_root(hpa_t root)
         * requirement isn't satisfied.
         */
        smp_rmb();
-       sp = to_shadow_page(root);
+       sp = root_to_sp(root);
 
        /*
         * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
@@ -4048,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 
        if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
                hpa_t root = vcpu->arch.mmu->root.hpa;
-               sp = to_shadow_page(root);
 
                if (!is_unsync_root(root))
                        return;
 
+               sp = root_to_sp(root);
+
                write_lock(&vcpu->kvm->mmu_lock);
                mmu_sync_children(vcpu, sp, true);
                write_unlock(&vcpu->kvm->mmu_lock);
@@ -4194,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
                return RET_PF_EMULATE;
 
        reserved = get_mmio_spte(vcpu, addr, &spte);
-       if (WARN_ON(reserved))
+       if (WARN_ON_ONCE(reserved))
                return -EINVAL;
 
        if (is_mmio_spte(spte)) {
@@ -4232,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
         * guest is writing the page which is write tracked which can
         * not be fixed by page fault handler.
         */
-       if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
                return true;
 
        return false;
@@ -4382,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
                                struct kvm_page_fault *fault)
 {
-       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
+       struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
 
        /* Special roots, e.g. pae_root, are not backed by shadow pages. */
        if (sp && is_obsolete_sp(vcpu->kvm, sp))
@@ -4407,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 {
        int r;
 
+       /* Dummy roots are used only for shadowing bad guest roots. */
+       if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
+               return RET_PF_RETRY;
+
        if (page_fault_handle_page_track(vcpu, fault))
                return RET_PF_EMULATE;
 
@@ -4443,8 +4405,6 @@ out_unlock:
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
                                struct kvm_page_fault *fault)
 {
-       pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
-
        /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
        fault->max_level = PG_LEVEL_2M;
        return direct_page_fault(vcpu, fault);
@@ -4562,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context)
 static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
                                  union kvm_mmu_page_role role)
 {
-       return (role.direct || pgd == root->pgd) &&
-              VALID_PAGE(root->hpa) &&
-              role.word == to_shadow_page(root->hpa)->role.word;
+       struct kvm_mmu_page *sp;
+
+       if (!VALID_PAGE(root->hpa))
+               return false;
+
+       if (!role.direct && pgd != root->pgd)
+               return false;
+
+       sp = root_to_sp(root->hpa);
+       if (WARN_ON_ONCE(!sp))
+               return false;
+
+       return role.word == sp->role.word;
 }
 
 /*
@@ -4634,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
                            gpa_t new_pgd, union kvm_mmu_page_role new_role)
 {
        /*
-        * For now, limit the caching to 64-bit hosts+VMs in order to avoid
-        * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
-        * later if necessary.
+        * Limit reuse to 64-bit hosts+VMs without "special" roots in order to
+        * avoid having to deal with PDPTEs and other complexities.
         */
-       if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
+       if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
                kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
 
        if (VALID_PAGE(mmu->root.hpa))
@@ -4684,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
         * If this is a direct root page, it doesn't have a write flooding
         * count. Otherwise, clear the write flooding count.
         */
-       if (!new_role.direct)
-               __clear_sp_write_flooding_count(
-                               to_shadow_page(vcpu->arch.mmu->root.hpa));
+       if (!new_role.direct) {
+               struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
+
+               if (!WARN_ON_ONCE(!sp))
+                       __clear_sp_write_flooding_count(sp);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
@@ -4808,28 +4780,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
        }
 }
 
-static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
-{
-       /*
-        * If TDP is enabled, let the guest use GBPAGES if they're supported in
-        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
-        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
-        * walk for performance and complexity reasons.  Not to mention KVM
-        * _can't_ solve the problem because GVA->GPA walks aren't visible to
-        * KVM once a TDP translation is installed.  Mimic hardware behavior so
-        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
-        */
-       return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
-                            guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
-}
-
 static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
                                        struct kvm_mmu *context)
 {
        __reset_rsvds_bits_mask(&context->guest_rsvd_check,
                                vcpu->arch.reserved_gpa_bits,
                                context->cpu_role.base.level, is_efer_nx(context),
-                               guest_can_use_gbpages(vcpu),
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
                                is_cr4_pse(context),
                                guest_cpuid_is_amd_or_hygon(vcpu));
 }
@@ -4906,7 +4863,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
        __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
                                context->root_role.level,
                                context->root_role.efer_nx,
-                               guest_can_use_gbpages(vcpu), is_pse, is_amd);
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+                               is_pse, is_amd);
 
        if (!shadow_me_mask)
                return;
@@ -5467,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
         * physical address properties) in a single VM would require tracking
         * all relevant CPUID information in kvm_mmu_page_role. That is very
         * undesirable as it would increase the memory requirements for
-        * gfn_track (see struct kvm_mmu_page_role comments).  For now that
-        * problem is swept under the rug; KVM's CPUID API is horrific and
+        * gfn_write_track (see struct kvm_mmu_page_role comments).  For now
+        * that problem is swept under the rug; KVM's CPUID API is horrific and
         * it's all but impossible to solve it without introducing a new API.
         */
        vcpu->arch.root_mmu.root_role.word = 0;
@@ -5531,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
        struct kvm *kvm = vcpu->kvm;
 
        kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
+       WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
        kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
+       WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
        vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 }
 
@@ -5546,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
 
        /*
         * When freeing obsolete roots, treat roots as obsolete if they don't
-        * have an associated shadow page.  This does mean KVM will get false
+        * have an associated shadow page, as it's impossible to determine if
+        * such roots are fresh or stale.  This does mean KVM will get false
         * positives and free roots that don't strictly need to be freed, but
         * such false positives are relatively rare:
         *
-        *  (a) only PAE paging and nested NPT has roots without shadow pages
+        *  (a) only PAE paging and nested NPT have roots without shadow pages
+        *      (or any shadow paging flavor with a dummy root, see note below)
         *  (b) remote reloads due to a memslot update obsoletes _all_ roots
         *  (c) KVM doesn't track previous roots for PAE paging, and the guest
         *      is unlikely to zap an in-use PGD.
+        *
+        * Note!  Dummy roots are unique in that they are obsoleted by memslot
+        * _creation_!  See also FNAME(fetch).
         */
-       sp = to_shadow_page(root_hpa);
+       sp = root_to_sp(root_hpa);
        return !sp || is_obsolete_sp(kvm, sp);
 }
 
@@ -5634,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
 {
        unsigned offset, pte_size, misaligned;
 
-       pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-                gpa, bytes, sp->role.word);
-
        offset = offset_in_page(gpa);
        pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
 
@@ -5684,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
        return spte;
 }
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                             const u8 *new, int bytes,
-                             struct kvm_page_track_notifier_node *node)
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+                        int bytes)
 {
        gfn_t gfn = gpa >> PAGE_SHIFT;
        struct kvm_mmu_page *sp;
@@ -5702,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
                return;
 
-       pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-
        write_lock(&vcpu->kvm->mmu_lock);
 
        gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
@@ -5742,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
        int r, emulation_type = EMULTYPE_PF;
        bool direct = vcpu->arch.mmu->root_role.direct;
 
-       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+       /*
+        * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
+        * checks when emulating instructions that triggers implicit access.
+        * WARN if hardware generates a fault with an error code that collides
+        * with the KVM-defined value.  Clear the flag and continue on, i.e.
+        * don't terminate the VM, as KVM can't possibly be relying on a flag
+        * that KVM doesn't know about.
+        */
+       if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
+               error_code &= ~PFERR_IMPLICIT_ACCESS;
+
+       if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
                return RET_PF_RETRY;
 
        r = RET_PF_INVALID;
@@ -6099,7 +6067,7 @@ restart:
                 * pages.  Skip the bogus page, otherwise we'll get stuck in an
                 * infinite loop if the page gets put back on the list (again).
                 */
-               if (WARN_ON(sp->role.invalid))
+               if (WARN_ON_ONCE(sp->role.invalid))
                        continue;
 
                /*
@@ -6199,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
        return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
-                       struct kvm_memory_slot *slot,
-                       struct kvm_page_track_notifier_node *node)
-{
-       kvm_mmu_zap_all_fast(kvm);
-}
-
 int kvm_mmu_init_vm(struct kvm *kvm)
 {
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
        int r;
 
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
@@ -6222,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
                        return r;
        }
 
-       node->track_write = kvm_mmu_pte_write;
-       node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
-       kvm_page_track_register_notifier(kvm, node);
-
        kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
        kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
 
@@ -6246,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
 {
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-       kvm_page_track_unregister_notifier(kvm, node);
-
        if (tdp_mmu_enabled)
                kvm_mmu_uninit_tdp_mmu(kvm);
 
@@ -6670,7 +6622,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
         */
        if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
                            PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
-               kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               kvm_flush_remote_tlbs_memslot(kvm, slot);
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@@ -6689,20 +6641,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        }
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       /*
-        * All current use cases for flushing the TLBs for a specific memslot
-        * related to dirty logging, and many do the TLB flush out of mmu_lock.
-        * The interaction between the various operations on memslot must be
-        * serialized by slots_locks to ensure the TLB flush from one operation
-        * is observed by any other operation on the same memslot.
-        */
-       lockdep_assert_held(&kvm->slots_lock);
-       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot)
 {
@@ -6732,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
         */
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
+static void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        LIST_HEAD(invalid_list);
@@ -6741,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
        write_lock(&kvm->mmu_lock);
 restart:
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-               if (WARN_ON(sp->role.invalid))
+               if (WARN_ON_ONCE(sp->role.invalid))
                        continue;
                if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
                        goto restart;
@@ -6757,9 +6695,20 @@ restart:
        write_unlock(&kvm->mmu_lock);
 }
 
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+       kvm_mmu_zap_all(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+                                  struct kvm_memory_slot *slot)
+{
+       kvm_mmu_zap_all_fast(kvm);
+}
+
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
-       WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+       WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
 
        gen &= MMIO_SPTE_GEN_MASK;
 
@@ -6862,7 +6811,7 @@ static void mmu_destroy_caches(void)
 static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
 {
        if (nx_hugepage_mitigation_hard_disabled)
-               return sprintf(buffer, "never\n");
+               return sysfs_emit(buffer, "never\n");
 
        return param_get_bool(buffer, kp);
 }
index d39af56..b102014 100644 (file)
@@ -6,18 +6,10 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_host.h>
 
-#undef MMU_DEBUG
-
-#ifdef MMU_DEBUG
-extern bool dbg;
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
+#ifdef CONFIG_KVM_PROVE_MMU
+#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
 #else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
+#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
 #endif
 
 /* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
@@ -44,6 +36,16 @@ extern bool dbg;
 #define INVALID_PAE_ROOT       0
 #define IS_VALID_PAE_ROOT(x)   (!!(x))
 
+static inline hpa_t kvm_mmu_get_dummy_root(void)
+{
+       return my_zero_pfn(0) << PAGE_SHIFT;
+}
+
+static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
+{
+       return is_zero_pfn(shadow_page >> PAGE_SHIFT);
+}
+
 typedef u64 __rcu *tdp_ptep_t;
 
 struct kvm_mmu_page {
@@ -170,9 +172,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn,
                                    int min_level);
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages);
-
 /* Flush the given page (huge or not) of guest memory. */
 static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
 {
index 0a2ac43..c87da11 100644 (file)
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/lockdep.h>
 #include <linux/kvm_host.h>
 #include <linux/rculist.h>
 
-#include <asm/kvm_page_track.h>
-
 #include "mmu.h"
 #include "mmu_internal.h"
+#include "page_track.h"
 
 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 {
@@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
 {
-       int i;
-
-       for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-               kvfree(slot->arch.gfn_track[i]);
-               slot->arch.gfn_track[i] = NULL;
-       }
+       kvfree(slot->arch.gfn_write_track);
+       slot->arch.gfn_write_track = NULL;
 }
 
-int kvm_page_track_create_memslot(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot,
-                                 unsigned long npages)
+static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
+                                                unsigned long npages)
 {
-       int i;
-
-       for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-               if (i == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm))
-                       continue;
-
-               slot->arch.gfn_track[i] =
-                       __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-                                 GFP_KERNEL_ACCOUNT);
-               if (!slot->arch.gfn_track[i])
-                       goto track_free;
-       }
+       const size_t size = sizeof(*slot->arch.gfn_write_track);
 
-       return 0;
+       if (!slot->arch.gfn_write_track)
+               slot->arch.gfn_write_track = __vcalloc(npages, size,
+                                                      GFP_KERNEL_ACCOUNT);
 
-track_free:
-       kvm_page_track_free_memslot(slot);
-       return -ENOMEM;
+       return slot->arch.gfn_write_track ? 0 : -ENOMEM;
 }
 
-static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
+                                 unsigned long npages)
 {
-       if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-               return false;
+       if (!kvm_page_track_write_tracking_enabled(kvm))
+               return 0;
 
-       return true;
+       return __kvm_page_track_write_tracking_alloc(slot, npages);
 }
 
 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
 {
-       unsigned short *gfn_track;
-
-       if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
-               return 0;
-
-       gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
-                             GFP_KERNEL_ACCOUNT);
-       if (gfn_track == NULL)
-               return -ENOMEM;
-
-       slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
-       return 0;
+       return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
 }
 
-static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
-                            enum kvm_page_track_mode mode, short count)
+static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
+                                  short count)
 {
        int index, val;
 
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
 
-       val = slot->arch.gfn_track[mode][index];
+       val = slot->arch.gfn_write_track[index];
 
-       if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+       if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
                return;
 
-       slot->arch.gfn_track[mode][index] += count;
+       slot->arch.gfn_write_track[index] += count;
 }
 
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot, gfn_t gfn,
-                                 enum kvm_page_track_mode mode)
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+                              gfn_t gfn)
 {
+       lockdep_assert_held_write(&kvm->mmu_lock);
 
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return;
+       lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+                           srcu_read_lock_held(&kvm->srcu));
 
-       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm)))
+       if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
                return;
 
-       update_gfn_track(slot, gfn, mode, 1);
+       update_gfn_write_track(slot, gfn, 1);
 
        /*
         * new track stops large page mapping for the
@@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
         */
        kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
-       if (mode == KVM_PAGE_TRACK_WRITE)
-               if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
-                       kvm_flush_remote_tlbs(kvm);
+       if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
+               kvm_flush_remote_tlbs(kvm);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
 
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page. It is the opposed operation of
- * kvm_slot_page_track_add_page().
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot, gfn_t gfn,
-                                    enum kvm_page_track_mode mode)
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot, gfn_t gfn)
 {
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return;
+       lockdep_assert_held_write(&kvm->mmu_lock);
 
-       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm)))
+       lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+                           srcu_read_lock_held(&kvm->srcu));
+
+       if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
                return;
 
-       update_gfn_track(slot, gfn, mode, -1);
+       update_gfn_write_track(slot, gfn, -1);
 
        /*
         * allow large page mapping for the tracked page
@@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
         */
        kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 
 /*
  * check if the corresponding access on the specified guest page is tracked.
  */
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-                                  const struct kvm_memory_slot *slot,
-                                  gfn_t gfn, enum kvm_page_track_mode mode)
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+                             const struct kvm_memory_slot *slot, gfn_t gfn)
 {
        int index;
 
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return false;
-
        if (!slot)
                return false;
 
-       if (mode == KVM_PAGE_TRACK_WRITE &&
-           !kvm_page_track_write_tracking_enabled(kvm))
+       if (!kvm_page_track_write_tracking_enabled(kvm))
                return false;
 
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
-       return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
+       return !!READ_ONCE(slot->arch.gfn_write_track[index]);
 }
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 void kvm_page_track_cleanup(struct kvm *kvm)
 {
        struct kvm_page_track_notifier_head *head;
@@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm)
  * register the notifier so that event interception for the tracked guest
  * pages can be received.
  */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-                                struct kvm_page_track_notifier_node *n)
+int kvm_page_track_register_notifier(struct kvm *kvm,
+                                    struct kvm_page_track_notifier_node *n)
 {
        struct kvm_page_track_notifier_head *head;
 
+       if (!kvm || kvm->mm != current->mm)
+               return -ESRCH;
+
+       kvm_get_kvm(kvm);
+
        head = &kvm->arch.track_notifier_head;
 
        write_lock(&kvm->mmu_lock);
        hlist_add_head_rcu(&n->node, &head->track_notifier_list);
        write_unlock(&kvm->mmu_lock);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
 
@@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
  * stop receiving the event interception. It is the opposed operation of
  * kvm_page_track_register_notifier().
  */
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-                                  struct kvm_page_track_notifier_node *n)
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+                                       struct kvm_page_track_notifier_node *n)
 {
        struct kvm_page_track_notifier_head *head;
 
@@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
        hlist_del_rcu(&n->node);
        write_unlock(&kvm->mmu_lock);
        synchronize_srcu(&head->track_srcu);
+
+       kvm_put_kvm(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
 
@@ -256,34 +203,30 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                         int bytes)
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
 {
        struct kvm_page_track_notifier_head *head;
        struct kvm_page_track_notifier_node *n;
        int idx;
 
-       head = &vcpu->kvm->arch.track_notifier_head;
+       head = &kvm->arch.track_notifier_head;
 
        if (hlist_empty(&head->track_notifier_list))
                return;
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
+                                 srcu_read_lock_held(&head->track_srcu))
                if (n->track_write)
-                       n->track_write(vcpu, gpa, new, bytes, n);
+                       n->track_write(gpa, new, bytes, n);
        srcu_read_unlock(&head->track_srcu, idx);
 }
 
 /*
- * Notify the node that memory slot is being removed or moved so that it can
- * drop write-protection for the pages in the memory slot.
- *
- * The node should figure out it has any write-protected pages in this slot
- * by itself.
+ * Notify external page track nodes that a memory region is being removed from
+ * the VM, e.g. so that users can free any associated metadata.
  */
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
        struct kvm_page_track_notifier_head *head;
        struct kvm_page_track_notifier_node *n;
@@ -296,8 +239,69 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
-               if (n->track_flush_slot)
-                       n->track_flush_slot(kvm, slot, n);
+                                 srcu_read_lock_held(&head->track_srcu))
+               if (n->track_remove_region)
+                       n->track_remove_region(slot->base_gfn, slot->npages, n);
        srcu_read_unlock(&head->track_srcu, idx);
 }
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
+{
+       struct kvm_memory_slot *slot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+
+       slot = gfn_to_memslot(kvm, gfn);
+       if (!slot) {
+               srcu_read_unlock(&kvm->srcu, idx);
+               return -EINVAL;
+       }
+
+       write_lock(&kvm->mmu_lock);
+       __kvm_write_track_add_gfn(kvm, slot, gfn);
+       write_unlock(&kvm->mmu_lock);
+
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
+{
+       struct kvm_memory_slot *slot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+
+       slot = gfn_to_memslot(kvm, gfn);
+       if (!slot) {
+               srcu_read_unlock(&kvm->srcu, idx);
+               return -EINVAL;
+       }
+
+       write_lock(&kvm->mmu_lock);
+       __kvm_write_track_remove_gfn(kvm, slot, gfn);
+       write_unlock(&kvm->mmu_lock);
+
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
+#endif
diff --git a/arch/x86/kvm/mmu/page_track.h b/arch/x86/kvm/mmu/page_track.h
new file mode 100644 (file)
index 0000000..d4d72ed
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_PAGE_TRACK_H
+#define __KVM_X86_PAGE_TRACK_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_page_track.h>
+
+
+bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
+int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
+                                 unsigned long npages);
+
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+                              gfn_t gfn);
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot, gfn_t gfn);
+
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+                             const struct kvm_memory_slot *slot, gfn_t gfn);
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+int kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
+
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
+{
+       return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
+}
+#else
+static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
+static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
+
+static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
+                                         const u8 *new, int bytes) { }
+static inline void kvm_page_track_delete_slot(struct kvm *kvm,
+                                             struct kvm_memory_slot *slot) { }
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
+
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
+
+static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+                                       const u8 *new, int bytes)
+{
+       __kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
+
+       kvm_mmu_track_write(vcpu, gpa, new, bytes);
+}
+
+#endif /* __KVM_X86_PAGE_TRACK_H */
index 0662e02..c852550 100644 (file)
@@ -338,7 +338,6 @@ retry_walk:
        }
 #endif
        walker->max_level = walker->level;
-       ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
        /*
         * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
@@ -348,9 +347,21 @@ retry_walk:
        nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
        pte_access = ~0;
+
+       /*
+        * Queue a page fault for injection if this assertion fails, as callers
+        * assume that walker.fault contains sane info on a walk failure.  I.e.
+        * avoid making the situation worse by inducing even worse badness
+        * between when the assertion fails and when KVM kicks the vCPU out to
+        * userspace (because the VM is bugged).
+        */
+       if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
+               goto error;
+
        ++walker->level;
 
        do {
+               struct kvm_memory_slot *slot;
                unsigned long host_addr;
 
                pt_access = pte_access;
@@ -381,7 +392,11 @@ retry_walk:
                if (unlikely(real_gpa == INVALID_GPA))
                        return 0;
 
-               host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
+               slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
+               if (!kvm_is_visible_memslot(slot))
+                       goto error;
+
+               host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
                                            &walker->pte_writable[walker->level - 1]);
                if (unlikely(kvm_is_error_hva(host_addr)))
                        goto error;
@@ -456,9 +471,6 @@ retry_walk:
                        goto retry_walk;
        }
 
-       pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-                __func__, (u64)pte, walker->pte_access,
-                walker->pt_access[walker->level - 1]);
        return 1;
 
 error:
@@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
        if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
                return false;
 
-       pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-
        gfn = gpte_to_gfn(gpte);
        pte_access = sp->role.access & FNAME(gpte_access)(gpte);
        FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
@@ -638,8 +648,19 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
        if (FNAME(gpte_changed)(vcpu, gw, top_level))
                goto out_gpte_changed;
 
-       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+       if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+               goto out_gpte_changed;
+
+       /*
+        * Load a new root and retry the faulting instruction in the extremely
+        * unlikely scenario that the guest root gfn became visible between
+        * loading a dummy root and handling the resulting page fault, e.g. if
+        * userspace create a memslot in the interim.
+        */
+       if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
+               kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
                goto out_gpte_changed;
+       }
 
        for_each_shadow_entry(vcpu, fault->addr, it) {
                gfn_t table_gfn;
@@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        struct guest_walker walker;
        int r;
 
-       pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
        WARN_ON_ONCE(fault->is_tdp);
 
        /*
@@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
         * The page is not mapped by the guest.  Let the guest handle it.
         */
        if (!r) {
-               pgprintk("%s: guest page fault\n", __func__);
                if (!fault->prefetch)
                        kvm_inject_emulated_page_fault(vcpu, &walker.fault);
 
@@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
 {
        int offset = 0;
 
-       WARN_ON(sp->role.level != PG_LEVEL_4K);
+       WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
 
        if (PTTYPE == 32)
                offset = sp->role.quadrant << SPTE_LEVEL_BITS;
index cf2c642..4a59913 100644 (file)
@@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen)
 {
        u64 mask;
 
-       WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+       WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
 
        mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
        mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
@@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                 * shadow pages and unsync'ing pages is not allowed.
                 */
                if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
-                       pgprintk("%s: found shadow page for %llx, marking ro\n",
-                                __func__, gfn);
                        wrprot = true;
                        pte_access &= ~ACC_WRITE_MASK;
                        spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
@@ -242,7 +240,7 @@ out:
 
        if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
                /* Enforced by kvm_mmu_hugepage_adjust. */
-               WARN_ON(level > PG_LEVEL_4K);
+               WARN_ON_ONCE(level > PG_LEVEL_4K);
                mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
        }
 
index 1279db2..a129951 100644 (file)
@@ -3,6 +3,7 @@
 #ifndef KVM_X86_MMU_SPTE_H
 #define KVM_X86_MMU_SPTE_H
 
+#include "mmu.h"
 #include "mmu_internal.h"
 
 /*
@@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
        return to_shadow_page(__pa(sptep));
 }
 
+static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
+{
+       if (kvm_mmu_is_dummy_root(root))
+               return NULL;
+
+       /*
+        * The "root" may be a special root, e.g. a PAE entry, treat it as a
+        * SPTE to ensure any non-PA bits are dropped.
+        */
+       return spte_to_child_sp(root);
+}
+
 static inline bool is_mmio_spte(u64 spte)
 {
        return (spte & shadow_mmio_mask) == shadow_mmio_value &&
@@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
 
 static inline bool spte_ad_enabled(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
 }
 
 static inline bool spte_ad_need_write_protect(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        /*
         * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
         * and non-TDP SPTEs will never set these bits.  Optimize for 64-bit
@@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte)
 
 static inline u64 spte_shadow_accessed_mask(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
 }
 
 static inline u64 spte_shadow_dirty_mask(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
 }
 
index d2eb0d4..bd30ebf 100644 (file)
@@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter)
 void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
                    int min_level, gfn_t next_last_level_gfn)
 {
-       int root_level = root->role.level;
-
-       WARN_ON(root_level < 1);
-       WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+       if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
+                        (root->role.level > PT64_ROOT_MAX_LEVEL))) {
+               iter->valid = false;
+               return;
+       }
 
        iter->next_last_level_gfn = next_last_level_gfn;
-       iter->root_level = root_level;
+       iter->root_level = root->role.level;
        iter->min_level = min_level;
        iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
        iter->as_id = kvm_mmu_page_as_id(root);
index 512163d..6c63f2d 100644 (file)
@@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
        bool is_leaf = is_present && is_last_spte(new_spte, level);
        bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
 
-       WARN_ON(level > PT64_ROOT_MAX_LEVEL);
-       WARN_ON(level < PG_LEVEL_4K);
-       WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+       WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
+       WARN_ON_ONCE(level < PG_LEVEL_4K);
+       WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
 
        /*
         * If this warning were to trigger it would indicate that there was a
@@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
                 * impact the guest since both the former and current SPTEs
                 * are nonpresent.
                 */
-               if (WARN_ON(!is_mmio_spte(old_spte) &&
-                           !is_mmio_spte(new_spte) &&
-                           !is_removed_spte(new_spte)))
+               if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
+                                !is_mmio_spte(new_spte) &&
+                                !is_removed_spte(new_spte)))
                        pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
                               "should not be replaced with another,\n"
                               "different nonpresent SPTE, unless one or both\n"
@@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
         * should be used. If operating under the MMU lock in write mode, the
         * use of the removed SPTE should not be necessary.
         */
-       WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
+       WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
 
        old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
 
@@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
                else
 
 #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)                \
-       for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
+       for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
 
 /*
  * Yield if the MMU lock is contended or this thread needs to return control
@@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
                                                          struct tdp_iter *iter,
                                                          bool flush, bool shared)
 {
-       WARN_ON(iter->yielded);
+       WARN_ON_ONCE(iter->yielded);
 
        /* Ensure forward progress has been made before yielding. */
        if (iter->next_last_level_gfn == iter->yielded_gfn)
@@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 
                rcu_read_lock();
 
-               WARN_ON(iter->gfn > iter->next_last_level_gfn);
+               WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
 
                iter->yielded = true;
        }
@@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
        u64 new_spte;
 
        /* Huge pages aren't expected to be modified without first being zapped. */
-       WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+       WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
 
        if (iter->level != PG_LEVEL_4K ||
            !is_shadow_present_pte(iter->old_spte))
@@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
         */
        tdp_mmu_iter_set_spte(kvm, iter, 0);
 
-       if (!pte_write(range->pte)) {
+       if (!pte_write(range->arg.pte)) {
                new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
-                                                                 pte_pfn(range->pte));
+                                                                 pte_pfn(range->arg.pte));
 
                tdp_mmu_iter_set_spte(kvm, iter, new_spte);
        }
@@ -1548,8 +1548,8 @@ retry:
                if (!is_shadow_present_pte(iter.old_spte))
                        continue;
 
-               MMU_WARN_ON(kvm_ad_enabled() &&
-                           spte_ad_need_write_protect(iter.old_spte));
+               KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+                               spte_ad_need_write_protect(iter.old_spte));
 
                if (!(iter.old_spte & dbit))
                        continue;
@@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
                                                   shadow_dirty_mask;
        struct tdp_iter iter;
 
+       lockdep_assert_held_write(&kvm->mmu_lock);
+
        rcu_read_lock();
 
        tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
@@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
                if (!mask)
                        break;
 
-               MMU_WARN_ON(kvm_ad_enabled() &&
-                           spte_ad_need_write_protect(iter.old_spte));
+               KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+                               spte_ad_need_write_protect(iter.old_spte));
 
                if (iter.level > PG_LEVEL_4K ||
                    !(mask & (1UL << (iter.gfn - gfn))))
@@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 {
        struct kvm_mmu_page *root;
 
-       lockdep_assert_held_write(&kvm->mmu_lock);
        for_each_tdp_mmu_root(kvm, root, slot->as_id)
                clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
 }
index bf653df..edb89b5 100644 (file)
@@ -382,9 +382,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
        struct kvm_x86_pmu_event_filter *filter;
        struct kvm *kvm = pmc->vcpu->kvm;
 
-       if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
-               return false;
-
        filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
        if (!filter)
                return true;
@@ -398,6 +395,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
 {
        return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+              static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
               check_pmu_event_filter(pmc);
 }
 
index 56cbdb2..b816506 100644 (file)
@@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs {
 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
 #define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
 #define X86_FEATURE_AVX_NE_CONVERT      KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define X86_FEATURE_AMX_COMPLEX         KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
 #define X86_FEATURE_PREFETCHITI         KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
 
 /* CPUID level 0x80000007 (EDX). */
index cfc8ab7..2092db8 100644 (file)
@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
        int ret = 0;
        unsigned long flags;
        struct amd_svm_iommu_ir *ir;
+       u64 entry;
 
        /**
         * In some cases, the existing irte is updated and re-set,
@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
        ir->data = pi->ir_data;
 
        spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+       /*
+        * Update the target pCPU for IOMMU doorbells if the vCPU is running.
+        * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
+        * will update the pCPU info when the vCPU awkened and/or scheduled in.
+        * See also avic_vcpu_load().
+        */
+       entry = READ_ONCE(*(svm->avic_physical_id_cache));
+       if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+               amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
+                                   true, pi->ir_data);
+
        list_add(&ir->node, &svm->ir_list);
        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 out:
@@ -986,10 +999,11 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
        int ret = 0;
-       unsigned long flags;
        struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       lockdep_assert_held(&svm->ir_list_lock);
+
        if (!kvm_arch_has_assigned_device(vcpu->kvm))
                return 0;
 
@@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
         * Here, we go through the per-vcpu ir_list to update all existing
         * interrupt remapping table entry targeting this vcpu.
         */
-       spin_lock_irqsave(&svm->ir_list_lock, flags);
-
        if (list_empty(&svm->ir_list))
-               goto out;
+               return 0;
 
        list_for_each_entry(ir, &svm->ir_list, node) {
                ret = amd_iommu_update_ga(cpu, r, ir->data);
                if (ret)
-                       break;
+                       return ret;
        }
-out:
-       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-       return ret;
+       return 0;
 }
 
 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        u64 entry;
        int h_physical_id = kvm_cpu_get_apicid(cpu);
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
@@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (kvm_vcpu_is_blocking(vcpu))
                return;
 
+       /*
+        * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
+        * _currently_ have assigned devices, as that can change.  Holding
+        * ir_list_lock ensures that either svm_ir_list_add() will consume
+        * up-to-date entry information, or that this task will wait until
+        * svm_ir_list_add() completes to set the new target pCPU.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
        WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 
@@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
 void avic_vcpu_put(struct kvm_vcpu *vcpu)
 {
        u64 entry;
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
+       /*
+        * Note, reading the Physical ID entry outside of ir_list_lock is safe
+        * as only the pCPU that has loaded (or is loading) the vCPU is allowed
+        * to modify the entry, and preemption is disabled.  I.e. the vCPU
+        * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
+        * recursively.
+        */
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 
        /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
        if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
                return;
 
+       /*
+        * Take and hold the per-vCPU interrupt remapping lock while updating
+        * the Physical ID entry even though the lock doesn't protect against
+        * multiple writers (see above).  Holding ir_list_lock ensures that
+        * either svm_ir_list_add() will consume up-to-date entry information,
+        * or that this task will wait until svm_ir_list_add() completes to
+        * mark the vCPU as not running.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 
        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
index 96936dd..dd496c9 100644 (file)
@@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
 {
-       if (!svm->v_vmload_vmsave_enabled)
+       if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
                return true;
 
        if (!nested_npt_enabled(svm))
@@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
        bool new_vmcb12 = false;
        struct vmcb *vmcb01 = svm->vmcb01.ptr;
        struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
 
        nested_vmcb02_compute_g_pat(svm);
 
@@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
                vmcb_mark_dirty(vmcb02, VMCB_DT);
        }
 
-       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+       kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
 
-       svm_set_efer(&svm->vcpu, svm->nested.save.efer);
+       svm_set_efer(vcpu, svm->nested.save.efer);
 
-       svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
-       svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
+       svm_set_cr0(vcpu, svm->nested.save.cr0);
+       svm_set_cr4(vcpu, svm->nested.save.cr4);
 
        svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 
-       kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
-       kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
-       kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
+       kvm_rax_write(vcpu, vmcb12->save.rax);
+       kvm_rsp_write(vcpu, vmcb12->save.rsp);
+       kvm_rip_write(vcpu, vmcb12->save.rip);
 
        /* In case we don't even reach vcpu_run, the fields are not updated */
        vmcb02->save.rax = vmcb12->save.rax;
@@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
                vmcb_mark_dirty(vmcb02, VMCB_DR);
        }
 
-       if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+       if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
                /*
                 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
                 * svm_set_msr's definition of reserved bits.
@@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
         * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
         */
 
-       if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+       if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
+           (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
                int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
        else
                int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
        vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
 
-       if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-               WARN_ON(!svm->tsc_scaling_enabled);
+       if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+           svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
                nested_svm_update_tsc_ratio_msr(vcpu);
-       }
 
        vmcb02->control.int_ctl             =
                (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
@@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
         * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
         * prior to injecting the event).
         */
-       if (svm->nrips_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
        else if (boot_cpu_has(X86_FEATURE_NRIPS))
                vmcb02->control.next_rip    = vmcb12_rip;
@@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
                svm->soft_int_injected = true;
                svm->soft_int_csbase = vmcb12_csbase;
                svm->soft_int_old_rip = vmcb12_rip;
-               if (svm->nrips_enabled)
+               if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                        svm->soft_int_next_rip = svm->nested.ctl.next_rip;
                else
                        svm->soft_int_next_rip = vmcb12_rip;
@@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
        vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
                                              LBR_CTL_ENABLE_MASK;
-       if (svm->lbrv_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_LBRV))
                vmcb02->control.virt_ext  |=
                        (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
 
        if (!nested_vmcb_needs_vls_intercept(svm))
                vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
-       pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
-       pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
+       if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
+               pause_count12 = svm->nested.ctl.pause_filter_count;
+       else
+               pause_count12 = 0;
+       if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
+               pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
+       else
+               pause_thresh12 = 0;
        if (kvm_pause_in_guest(svm->vcpu.kvm)) {
                /* use guest values since host doesn't intercept PAUSE */
                vmcb02->control.pause_filter_count = pause_count12;
@@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        if (vmcb12->control.exit_code != SVM_EXIT_ERR)
                nested_save_pending_event_to_vmcb12(svm, vmcb12);
 
-       if (svm->nrips_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                vmcb12->control.next_rip  = vmcb02->control.next_rip;
 
        vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
@@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        if (!nested_exit_on_intr(svm))
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
-       if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+       if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
                svm_copy_lbrs(vmcb12, vmcb02);
                svm_update_lbrv(vcpu);
        } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
@@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
                vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
        }
 
-       if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-               WARN_ON(!svm->tsc_scaling_enabled);
+       if (kvm_caps.has_tsc_control &&
+           vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
                vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
-               __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+               svm_write_tsc_multiplier(vcpu);
        }
 
        svm->nested.ctl.nested_cr3 = 0;
@@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
        vcpu->arch.tsc_scaling_ratio =
                kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
                                               svm->tsc_ratio_msr);
-       __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+       svm_write_tsc_multiplier(vcpu);
 }
 
 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
index d3aec1f..b9a0a93 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/pkru.h>
 #include <asm/trapnr.h>
 #include <asm/fpu/xcr.h>
+#include <asm/debugreg.h>
 
 #include "mmu.h"
 #include "x86.h"
@@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
 /* enable/disable SEV-ES support */
 static bool sev_es_enabled = true;
 module_param_named(sev_es, sev_es_enabled, bool, 0444);
+
+/* enable/disable SEV-ES DebugSwap support */
+static bool sev_es_debug_swap_enabled = true;
+module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
 #else
 #define sev_enabled false
 #define sev_es_enabled false
+#define sev_es_debug_swap_enabled false
 #endif /* CONFIG_KVM_AMD_SEV */
 
 static u8 sev_enc_bit;
@@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
        save->xss  = svm->vcpu.arch.ia32_xss;
        save->dr6  = svm->vcpu.arch.dr6;
 
+       if (sev_es_debug_swap_enabled)
+               save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
        pr_debug("Virtual Machine Save Area (VMSA):\n");
        print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
 
@@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
        struct vcpu_svm *svm = to_svm(vcpu);
        int ret;
 
+       if (vcpu->guest_debug) {
+               pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
+               return -EINVAL;
+       }
+
        /* Perform some pre-encryption checks against the VMSA */
        ret = sev_es_sync_vmsa(svm);
        if (ret)
@@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
                 * Note, the source is not required to have the same number of
                 * vCPUs as the destination when migrating a vanilla SEV VM.
                 */
-               src_vcpu = kvm_get_vcpu(dst_kvm, i);
+               src_vcpu = kvm_get_vcpu(src_kvm, i);
                src_svm = to_svm(src_vcpu);
 
                /*
@@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
        bool sev_es_supported = false;
        bool sev_supported = false;
 
-       if (!sev_enabled || !npt_enabled)
+       if (!sev_enabled || !npt_enabled || !nrips)
                goto out;
 
        /*
@@ -2256,6 +2270,9 @@ out:
 
        sev_enabled = sev_supported;
        sev_es_enabled = sev_es_supported;
+       if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
+           !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
+               sev_es_debug_swap_enabled = false;
 #endif
 }
 
@@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                                            svm->sev_es.ghcb_sa);
                break;
        case SVM_VMGEXIT_NMI_COMPLETE:
-               ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
+               ++vcpu->stat.nmi_window_exits;
+               svm->nmi_masked = false;
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               ret = 1;
                break;
        case SVM_VMGEXIT_AP_HLT_LOOP:
                ret = kvm_emulate_ap_reset_hold(vcpu);
@@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
 
 static void sev_es_init_vmcb(struct vcpu_svm *svm)
 {
+       struct vmcb *vmcb = svm->vmcb01.ptr;
        struct kvm_vcpu *vcpu = &svm->vcpu;
 
        svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
        /*
         * An SEV-ES guest requires a VMSA area that is a separate from the
         * VMCB page. Do not include the encryption mask on the VMSA physical
-        * address since hardware will access it using the guest key.
+        * address since hardware will access it using the guest key.  Note,
+        * the VMSA will be NULL if this vCPU is the destination for intrahost
+        * migration, and will be copied later.
         */
-       svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+       if (svm->sev_es.vmsa)
+               svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
 
        /* Can't intercept CR register access, HV can't modify CR registers */
        svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
        svm_set_intercept(svm, TRAP_CR4_WRITE);
        svm_set_intercept(svm, TRAP_CR8_WRITE);
 
-       /* No support for enable_vmware_backdoor */
-       clr_exception_intercept(svm, GP_VECTOR);
+       vmcb->control.intercepts[INTERCEPT_DR] = 0;
+       if (!sev_es_debug_swap_enabled) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+               recalc_intercepts(svm);
+       } else {
+               /*
+                * Disable #DB intercept iff DebugSwap is enabled.  KVM doesn't
+                * allow debugging SEV-ES guests, and enables DebugSwap iff
+                * NO_NESTED_DATA_BP is supported, so there's no reason to
+                * intercept #DB when DebugSwap is enabled.  For simplicity
+                * with respect to guest debug, intercept #DB for other VMs
+                * even if NO_NESTED_DATA_BP is supported, i.e. even if the
+                * guest can't DoS the CPU with infinite #DB vectoring.
+                */
+               clr_exception_intercept(svm, DB_VECTOR);
+       }
 
        /* Can't intercept XSETBV, HV can't modify XCR0 directly */
        svm_clr_intercept(svm, INTERCEPT_XSETBV);
@@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
        svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
        clr_exception_intercept(svm, UD_VECTOR);
 
+       /*
+        * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
+        * KVM can't decrypt guest memory to decode the faulting instruction.
+        */
+       clr_exception_intercept(svm, GP_VECTOR);
+
        if (sev_es_guest(svm->vcpu.kvm))
                sev_es_init_vmcb(svm);
 }
@@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
 void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
 {
        /*
-        * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
-        * of which one step is to perform a VMLOAD.  KVM performs the
-        * corresponding VMSAVE in svm_prepare_guest_switch for both
-        * traditional and SEV-ES guests.
+        * All host state for SEV-ES guests is categorized into three swap types
+        * based on how it is handled by hardware during a world switch:
+        *
+        * A: VMRUN:   Host state saved in host save area
+        *    VMEXIT:  Host state loaded from host save area
+        *
+        * B: VMRUN:   Host state _NOT_ saved in host save area
+        *    VMEXIT:  Host state loaded from host save area
+        *
+        * C: VMRUN:   Host state _NOT_ saved in host save area
+        *    VMEXIT:  Host state initialized to default(reset) values
+        *
+        * Manually save type-B state, i.e. state that is loaded by VMEXIT but
+        * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
+        * by common SVM code).
         */
-
-       /* XCR0 is restored on VMEXIT, save the current host value */
        hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-
-       /* PKRU is restored on VMEXIT, save the current host value */
        hostsa->pkru = read_pkru();
-
-       /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
        hostsa->xss = host_xss;
+
+       /*
+        * If DebugSwap is enabled, debug registers are loaded but NOT saved by
+        * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
+        * saves and loads debug registers (Type-A).
+        */
+       if (sev_es_debug_swap_enabled) {
+               hostsa->dr0 = native_get_debugreg(0);
+               hostsa->dr1 = native_get_debugreg(1);
+               hostsa->dr2 = native_get_debugreg(2);
+               hostsa->dr3 = native_get_debugreg(3);
+               hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
+               hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
+               hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
+               hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
+       }
 }
 
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
index d4bfdc6..f283eb4 100644 (file)
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
 #include <asm/traps.h>
+#include <asm/reboot.h>
 #include <asm/fpu/api.h>
 
-#include <asm/virtext.h>
-
 #include <trace/events/ipi.h>
 
 #include "trace.h"
@@ -203,7 +202,7 @@ static int nested = true;
 module_param(nested, int, S_IRUGO);
 
 /* enable/disable Next RIP Save */
-static int nrips = true;
+int nrips = true;
 module_param(nrips, int, 0444);
 
 /* enable/disable Virtual VMLOAD VMSAVE */
@@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
                svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
 
 }
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+                                       void *insn, int insn_len);
 
 static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
                                           bool commit_side_effects)
@@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
        }
 
        if (!svm->next_rip) {
+               /*
+                * FIXME: Drop this when kvm_emulate_instruction() does the
+                * right thing and treats "can't emulate" as outright failure
+                * for EMULTYPE_SKIP.
+                */
+               if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
+                       return 0;
+
                if (unlikely(!commit_side_effects))
                        old_rflags = svm->vmcb->save.rflags;
 
@@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
                vcpu->arch.osvw.status |= 1;
 }
 
-static bool kvm_is_svm_supported(void)
+static bool __kvm_is_svm_supported(void)
 {
-       int cpu = raw_smp_processor_id();
-       const char *msg;
+       int cpu = smp_processor_id();
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
+
        u64 vm_cr;
 
-       if (!cpu_has_svm(&msg)) {
-               pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
+       if (c->x86_vendor != X86_VENDOR_AMD &&
+           c->x86_vendor != X86_VENDOR_HYGON) {
+               pr_err("CPU %d isn't AMD or Hygon\n", cpu);
+               return false;
+       }
+
+       if (!cpu_has(c, X86_FEATURE_SVM)) {
+               pr_err("SVM not supported by CPU %d\n", cpu);
                return false;
        }
 
@@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void)
        return true;
 }
 
+static bool kvm_is_svm_supported(void)
+{
+       bool supported;
+
+       migrate_disable();
+       supported = __kvm_is_svm_supported();
+       migrate_enable();
+
+       return supported;
+}
+
 static int svm_check_processor_compat(void)
 {
-       if (!kvm_is_svm_supported())
+       if (!__kvm_is_svm_supported())
                return -EIO;
 
        return 0;
 }
 
-void __svm_write_tsc_multiplier(u64 multiplier)
+static void __svm_write_tsc_multiplier(u64 multiplier)
 {
-       preempt_disable();
-
        if (multiplier == __this_cpu_read(current_tsc_ratio))
-               goto out;
+               return;
 
        wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
        __this_cpu_write(current_tsc_ratio, multiplier);
-out:
-       preempt_enable();
+}
+
+static inline void kvm_cpu_svm_disable(void)
+{
+       uint64_t efer;
+
+       wrmsrl(MSR_VM_HSAVE_PA, 0);
+       rdmsrl(MSR_EFER, efer);
+       if (efer & EFER_SVME) {
+               /*
+                * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+                * NMI aren't blocked.
+                */
+               stgi();
+               wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+       }
+}
+
+static void svm_emergency_disable(void)
+{
+       kvm_rebooting = true;
+
+       kvm_cpu_svm_disable();
 }
 
 static void svm_hardware_disable(void)
@@ -569,7 +615,7 @@ static void svm_hardware_disable(void)
        if (tsc_scaling)
                __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
 
-       cpu_svm_disable();
+       kvm_cpu_svm_disable();
 
        amd_pmu_disable_virt();
 }
@@ -677,6 +723,39 @@ free_save_area:
 
 }
 
+static void set_dr_intercepts(struct vcpu_svm *svm)
+{
+       struct vmcb *vmcb = svm->vmcb01.ptr;
+
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+
+       recalc_intercepts(svm);
+}
+
+static void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+       struct vmcb *vmcb = svm->vmcb01.ptr;
+
+       vmcb->control.intercepts[INTERCEPT_DR] = 0;
+
+       recalc_intercepts(svm);
+}
+
 static int direct_access_msr_slot(u32 msr)
 {
        u32 i;
@@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
                svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
 }
 
-static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
 {
        /*
-        * If the LBR virtualization is disabled, the LBR msrs are always
-        * kept in the vmcb01 to avoid copying them on nested guest entries.
-        *
-        * If nested, and the LBR virtualization is enabled/disabled, the msrs
-        * are moved between the vmcb01 and vmcb02 as needed.
+        * If LBR virtualization is disabled, the LBR MSRs are always kept in
+        * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
+        * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
         */
-       struct vmcb *vmcb =
-               (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
-                       svm->vmcb : svm->vmcb01.ptr;
-
-       switch (index) {
-       case MSR_IA32_DEBUGCTLMSR:
-               return vmcb->save.dbgctl;
-       case MSR_IA32_LASTBRANCHFROMIP:
-               return vmcb->save.br_from;
-       case MSR_IA32_LASTBRANCHTOIP:
-               return vmcb->save.br_to;
-       case MSR_IA32_LASTINTFROMIP:
-               return vmcb->save.last_excp_from;
-       case MSR_IA32_LASTINTTOIP:
-               return vmcb->save.last_excp_to;
-       default:
-               KVM_BUG(false, svm->vcpu.kvm,
-                       "%s: Unknown MSR 0x%x", __func__, index);
-               return 0;
-       }
+       return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+                                                                  svm->vmcb01.ptr;
 }
 
 void svm_update_lbrv(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-
-       bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
-                                          DEBUGCTLMSR_LBR;
-
-       bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
-                                     LBR_CTL_ENABLE_MASK);
-
-       if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
-               if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
-                       enable_lbrv = true;
+       bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+       bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+                           (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                           (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
 
        if (enable_lbrv == current_enable_lbrv)
                return;
@@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
        return svm->tsc_ratio_msr;
 }
 
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
-       svm->vmcb->control.tsc_offset = offset;
+       svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
        vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
-static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-       __svm_write_tsc_multiplier(multiplier);
+       preempt_disable();
+       if (to_svm(vcpu)->guest_state_loaded)
+               __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+       preempt_enable();
 }
 
-
 /* Evaluate instruction intercepts that depend on guest CPUID features. */
 static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
                                              struct vcpu_svm *svm)
@@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
 
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
-
-               svm->v_vmload_vmsave_enabled = false;
        } else {
                /*
                 * If hardware supports Virtual VMLOAD VMSAVE then enable it
@@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
         * Guest access to VMware backdoor ports could legitimately
         * trigger #GP because of TSS I/O permission bitmap.
         * We intercept those #GP and allow access to them anyway
-        * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
-        * decrypt guest memory to decode the faulting instruction.
+        * as VMware does.
         */
-       if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+       if (enable_vmware_backdoor)
                set_exception_intercept(svm, GP_VECTOR);
 
        svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (vcpu->arch.guest_state_protected)
+       if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
                return;
 
        get_debugreg(vcpu->arch.db[0], 0);
@@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
+
        ++vcpu->stat.nmi_window_exits;
        svm->awaiting_iret_completion = true;
 
        svm_clr_iret_intercept(svm);
-       if (!sev_es_guest(vcpu->kvm))
-               svm->nmi_iret_rip = kvm_rip_read(vcpu);
+       svm->nmi_iret_rip = kvm_rip_read(vcpu);
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        return 1;
@@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
        unsigned long val;
        int err = 0;
 
+       /*
+        * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
+        * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return 1;
+
        if (vcpu->guest_debug == 0) {
                /*
                 * No more DR vmexits; force a reload of the debug registers
@@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
        switch (msr_info->index) {
        case MSR_AMD64_TSC_RATIO:
-               if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+               if (!msr_info->host_initiated &&
+                   !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
                        return 1;
                msr_info->data = svm->tsc_ratio_msr;
                break;
@@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = svm->tsc_aux;
                break;
        case MSR_IA32_DEBUGCTLMSR:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
+               break;
        case MSR_IA32_LASTBRANCHFROMIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
+               break;
        case MSR_IA32_LASTBRANCHTOIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
+               break;
        case MSR_IA32_LASTINTFROMIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
+               break;
        case MSR_IA32_LASTINTTOIP:
-               msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
                break;
        case MSR_VM_HSAVE_PA:
                msr_info->data = svm->nested.hsave_msr;
@@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
        switch (ecx) {
        case MSR_AMD64_TSC_RATIO:
 
-               if (!svm->tsc_scaling_enabled) {
+               if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
 
                        if (!msr->host_initiated)
                                return 1;
@@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
                svm->tsc_ratio_msr = data;
 
-               if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+               if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+                   is_guest_mode(vcpu))
                        nested_svm_update_tsc_ratio_msr(vcpu);
 
                break;
@@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                if (data & DEBUGCTL_RESERVED_BITS)
                        return 1;
 
-               if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
-                       svm->vmcb->save.dbgctl = data;
-               else
-                       svm->vmcb01.ptr->save.dbgctl = data;
-
+               svm_get_lbr_vmcb(svm)->save.dbgctl = data;
                svm_update_lbrv(vcpu);
-
                break;
        case MSR_VM_HSAVE_PA:
                /*
@@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
        if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
                return; /* IRET will cause a vm exit */
 
+       /*
+        * SEV-ES guests are responsible for signaling when a vCPU is ready to
+        * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
+        * KVM can't intercept and single-step IRET to detect when NMIs are
+        * unblocked (architecturally speaking).  See SVM_VMGEXIT_NMI_COMPLETE.
+        *
+        * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
+        * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
+        * supported NAEs in the GHCB protocol.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (!gif_set(svm)) {
                if (vgif)
                        svm_set_intercept(svm, INTERCEPT_STGI);
@@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
        svm->soft_int_injected = false;
 
        /*
-        * If we've made progress since setting HF_IRET_MASK, we've
+        * If we've made progress since setting awaiting_iret_completion, we've
         * executed an IRET and can allow NMI injection.
         */
        if (svm->awaiting_iret_completion &&
-           (sev_es_guest(vcpu->kvm) ||
-            kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
+           kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
                svm->awaiting_iret_completion = false;
                svm->nmi_masked = false;
                kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_cpuid_entry2 *best;
 
-       vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVES);
-
-       /* Update nrips enabled cache */
-       svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
-                            guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
-
-       svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
-       svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
-
-       svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-
-       svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+       /*
+        * SVM doesn't provide a way to disable just XSAVES in the guest, KVM
+        * can only disable all variants of by disallowing CR4.OSXSAVE from
+        * being set.  As a result, if the host has XSAVE and XSAVES, and the
+        * guest has XSAVE enabled, the guest can execute XSAVES without
+        * faulting.  Treat XSAVES as enabled in this case regardless of
+        * whether it's advertised to the guest so that KVM context switches
+        * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
+        * the guest read/write access to the host's XSS.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           boot_cpu_has(X86_FEATURE_XSAVES) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
 
-       svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
 
-       svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+       /*
+        * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+        * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
+        * SVM on Intel is bonkers and extremely unlikely to work).
+        */
+       if (!guest_cpuid_is_intel(vcpu))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
-       svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
 
        svm_recalc_instruction_intercepts(vcpu, svm);
 
@@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
         * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
         * decode garbage.
         *
-        * Inject #UD if KVM reached this point without an instruction buffer.
-        * In practice, this path should never be hit by a well-behaved guest,
-        * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
-        * is still theoretically reachable, e.g. via unaccelerated fault-like
-        * AVIC access, and needs to be handled by KVM to avoid putting the
-        * guest into an infinite loop.   Injecting #UD is somewhat arbitrary,
-        * but its the least awful option given lack of insight into the guest.
+        * If KVM is NOT trying to simply skip an instruction, inject #UD if
+        * KVM reached this point without an instruction buffer.  In practice,
+        * this path should never be hit by a well-behaved guest, e.g. KVM
+        * doesn't intercept #UD or #GP for SEV guests, but this path is still
+        * theoretically reachable, e.g. via unaccelerated fault-like AVIC
+        * access, and needs to be handled by KVM to avoid putting the guest
+        * into an infinite loop.   Injecting #UD is somewhat arbitrary, but
+        * its the least awful option given lack of insight into the guest.
+        *
+        * If KVM is trying to skip an instruction, simply resume the guest.
+        * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
+        * will attempt to re-inject the INT3/INTO and skip the instruction.
+        * In that scenario, retrying the INT3/INTO and hoping the guest will
+        * make forward progress is the only option that has a chance of
+        * success (and in practice it will work the vast majority of the time).
         */
        if (unlikely(!insn)) {
-               kvm_queue_exception(vcpu, UD_VECTOR);
+               if (!(emul_type & EMULTYPE_SKIP))
+                       kvm_queue_exception(vcpu, UD_VECTOR);
                return false;
        }
 
@@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void)
 
        svm_adjust_mmio_mask();
 
+       nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+
        /*
         * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
-        * may be modified by svm_adjust_mmio_mask()).
+        * may be modified by svm_adjust_mmio_mask()), as well as nrips.
         */
        sev_hardware_setup();
 
@@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void)
                        goto err;
        }
 
-       if (nrips) {
-               if (!boot_cpu_has(X86_FEATURE_NRIPS))
-                       nrips = false;
-       }
-
        enable_apicv = avic = avic && avic_hardware_setup();
 
        if (!enable_apicv) {
@@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
        .pmu_ops = &amd_pmu_ops,
 };
 
+static void __svm_exit(void)
+{
+       kvm_x86_vendor_exit();
+
+       cpu_emergency_unregister_virt_callback(svm_emergency_disable);
+}
+
 static int __init svm_init(void)
 {
        int r;
@@ -5226,6 +5325,8 @@ static int __init svm_init(void)
        if (r)
                return r;
 
+       cpu_emergency_register_virt_callback(svm_emergency_disable);
+
        /*
         * Common KVM initialization _must_ come last, after this, /dev/kvm is
         * exposed to userspace!
@@ -5238,14 +5339,14 @@ static int __init svm_init(void)
        return 0;
 
 err_kvm_init:
-       kvm_x86_vendor_exit();
+       __svm_exit();
        return r;
 }
 
 static void __exit svm_exit(void)
 {
        kvm_exit();
-       kvm_x86_vendor_exit();
+       __svm_exit();
 }
 
 module_init(svm_init)
index 8239c8d..f412539 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/svm.h>
 #include <asm/sev-common.h>
 
+#include "cpuid.h"
 #include "kvm_cache_regs.h"
 
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@ -33,6 +34,7 @@
 #define MSRPM_OFFSETS  32
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
+extern int nrips;
 extern int vgif;
 extern bool intercept_smi;
 extern bool x2avic_enabled;
@@ -260,16 +262,6 @@ struct vcpu_svm {
        unsigned long soft_int_next_rip;
        bool soft_int_injected;
 
-       /* optional nested SVM features that are enabled for this guest  */
-       bool nrips_enabled                : 1;
-       bool tsc_scaling_enabled          : 1;
-       bool v_vmload_vmsave_enabled      : 1;
-       bool lbrv_enabled                 : 1;
-       bool pause_filter_enabled         : 1;
-       bool pause_threshold_enabled      : 1;
-       bool vgif_enabled                 : 1;
-       bool vnmi_enabled                 : 1;
-
        u32 ldr_reg;
        u32 dfr_reg;
        struct page *avic_backing_page;
@@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
        return test_bit(bit, (unsigned long *)&control->intercepts);
 }
 
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
-       struct vmcb *vmcb = svm->vmcb01.ptr;
-
-       if (!sev_es_guest(svm->vcpu.kvm)) {
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
-       }
-
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-
-       recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
-       struct vmcb *vmcb = svm->vmcb01.ptr;
-
-       vmcb->control.intercepts[INTERCEPT_DR] = 0;
-
-       /* DR7 access must remain intercepted for an SEV-ES guest */
-       if (sev_es_guest(svm->vcpu.kvm)) {
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-       }
-
-       recalc_intercepts(svm);
-}
-
 static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
 {
        struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
 
 static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
 {
-       return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+              (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
 }
 
 static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
@@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
 
 static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
 {
-       return svm->vnmi_enabled &&
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
               (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
 }
 
@@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
                               bool has_error_code, u32 error_code);
 int nested_svm_exit_special(struct vcpu_svm *svm);
 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
-void __svm_write_tsc_multiplier(u64 multiplier);
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
                                       struct vmcb_control_area *control);
 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
index d0abee3..41a4533 100644 (file)
@@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void)
 static inline bool cpu_has_vmx_xsaves(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_XSAVES;
+               SECONDARY_EXEC_ENABLE_XSAVES;
 }
 
 static inline bool cpu_has_vmx_waitpkg(void)
index 79450e1..313b8bb 100644 (file)
@@ -78,7 +78,7 @@
         SECONDARY_EXEC_DESC |                                          \
         SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
         SECONDARY_EXEC_ENABLE_INVPCID |                                \
-        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_ENABLE_XSAVES |                                 \
         SECONDARY_EXEC_RDSEED_EXITING |                                \
         SECONDARY_EXEC_RDRAND_EXITING |                                \
         SECONDARY_EXEC_TSC_SCALING |                                   \
index 516391c..c5ec0ef 100644 (file)
@@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
                                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                  SECONDARY_EXEC_ENABLE_INVPCID |
                                  SECONDARY_EXEC_ENABLE_RDTSCP |
-                                 SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_XSAVES |
                                  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
                 * If if it were, XSS would have to be checked against
                 * the XSS exit bitmap in vmcs12.
                 */
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
        case EXIT_REASON_UMWAIT:
        case EXIT_REASON_TPAUSE:
                return nested_cpu_has2(vmcs12,
@@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
        vmx = to_vmx(vcpu);
        vmcs12 = get_vmcs12(vcpu);
 
-       if (nested_vmx_allowed(vcpu) &&
+       if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
            (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
                kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
                kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
                if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
                        return -EINVAL;
        } else {
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return -EINVAL;
 
                if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
-               (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
+           (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+            !vmx->nested.enlightened_vmcs_enabled))
                        return -EINVAL;
 
        vmx_leave_nested(vcpu);
@@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
                SECONDARY_EXEC_ENABLE_INVPCID |
                SECONDARY_EXEC_ENABLE_VMFUNC |
                SECONDARY_EXEC_RDSEED_EXITING |
-               SECONDARY_EXEC_XSAVES |
+               SECONDARY_EXEC_ENABLE_XSAVES |
                SECONDARY_EXEC_TSC_SCALING |
                SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
 
index 9695226..b4b9d51 100644 (file)
@@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 
 static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 {
-       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
 }
 
 static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
index 80c769c..f2efa0b 100644 (file)
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
+enum intel_pmu_architectural_events {
+       /*
+        * The order of the architectural events matters as support for each
+        * event is enumerated via CPUID using the index of the event.
+        */
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+
+       NR_REAL_INTEL_ARCH_EVENTS,
+
+       /*
+        * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
+        * TSC reference cycles.  The architectural reference cycles event may
+        * or may not actually use the TSC as the reference, e.g. might use the
+        * core crystal clock or the bus clock (yeah, "architectural").
+        */
+       PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
+       NR_INTEL_ARCH_EVENTS,
+};
+
 static struct {
        u8 eventsel;
        u8 unit_mask;
 } const intel_arch_events[] = {
-       [0] = { 0x3c, 0x00 },
-       [1] = { 0xc0, 0x00 },
-       [2] = { 0x3c, 0x01 },
-       [3] = { 0x2e, 0x4f },
-       [4] = { 0x2e, 0x41 },
-       [5] = { 0xc4, 0x00 },
-       [6] = { 0xc5, 0x00 },
-       /* The above index must match CPUID 0x0A.EBX bit vector */
-       [7] = { 0x00, 0x03 },
+       [INTEL_ARCH_CPU_CYCLES]                 = { 0x3c, 0x00 },
+       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = { 0xc0, 0x00 },
+       [INTEL_ARCH_REFERENCE_CYCLES]           = { 0x3c, 0x01 },
+       [INTEL_ARCH_LLC_REFERENCES]             = { 0x2e, 0x4f },
+       [INTEL_ARCH_LLC_MISSES]                 = { 0x2e, 0x41 },
+       [INTEL_ARCH_BRANCHES_RETIRED]           = { 0xc4, 0x00 },
+       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = { 0xc5, 0x00 },
+       [PSEUDO_ARCH_REFERENCE_CYCLES]          = { 0x00, 0x03 },
 };
 
 /* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {
+       [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       [1] = INTEL_ARCH_CPU_CYCLES,
+       [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
+};
 
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
@@ -80,16 +108,18 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc)
        u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+       BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
+
+       /*
+        * Disallow events reported as unavailable in guest CPUID.  Note, this
+        * doesn't apply to pseudo-architectural events.
+        */
+       for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
                if (intel_arch_events[i].eventsel != event_select ||
                    intel_arch_events[i].unit_mask != unit_mask)
                        continue;
 
-               /* disable event that reported as not present by cpuid */
-               if ((i < 7) && !(pmu->available_event_types & (1 << i)))
-                       return false;
-
-               break;
+               return pmu->available_event_types & BIT(i);
        }
 
        return true;
@@ -438,16 +468,17 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
 {
-       size_t size = ARRAY_SIZE(fixed_pmc_events);
-       struct kvm_pmc *pmc;
-       u32 event;
        int i;
 
+       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+
        for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-               pmc = &pmu->fixed_counters[i];
-               event = fixed_pmc_events[array_index_nospec(i, size)];
+               int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
+               struct kvm_pmc *pmc = &pmu->fixed_counters[index];
+               u32 event = fixed_pmc_events[index];
+
                pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-                       intel_arch_events[event].eventsel;
+                                intel_arch_events[event].eventsel;
        }
 }
 
@@ -508,10 +539,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        if (pmu->version == 1) {
                pmu->nr_arch_fixed_counters = 0;
        } else {
-               pmu->nr_arch_fixed_counters =
-                       min3(ARRAY_SIZE(fixed_pmc_events),
-                            (size_t) edx.split.num_counters_fixed,
-                            (size_t)kvm_pmu_cap.num_counters_fixed);
+               pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+                                                   kvm_pmu_cap.num_counters_fixed);
                edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
                                                  kvm_pmu_cap.bit_width_fixed);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
index b483a8b..72e3943 100644 (file)
 #include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
-#include <asm/kexec.h>
+#include <asm/reboot.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
 #include <asm/mwait.h>
 #include <asm/spec-ctrl.h>
-#include <asm/virtext.h>
 #include <asm/vmx.h>
 
 #include "capabilities.h"
@@ -237,9 +236,6 @@ static const struct {
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;
 
-/* Control for disabling CPU Fill buffer clear */
-static bool __read_mostly vmx_fb_clear_ctrl_available;
-
 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
        struct page *page;
@@ -255,14 +251,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
                return 0;
        }
 
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               u64 msr;
-
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-                       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-                       return 0;
-               }
+       if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+               l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+               return 0;
        }
 
        /* If set to auto use the default l1tf mitigation method */
@@ -366,22 +357,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
        if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
-               return sprintf(s, "???\n");
+               return sysfs_emit(s, "???\n");
 
-       return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
-}
-
-static void vmx_setup_fb_clear_ctrl(void)
-{
-       u64 msr;
-
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
-           !boot_cpu_has_bug(X86_BUG_MDS) &&
-           !boot_cpu_has_bug(X86_BUG_TAA)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_FB_CLEAR_CTRL)
-                       vmx_fb_clear_ctrl_available = true;
-       }
+       return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
 static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
@@ -409,7 +387,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
 
 static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 {
-       vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+       vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+                               !boot_cpu_has_bug(X86_BUG_MDS) &&
+                               !boot_cpu_has_bug(X86_BUG_TAA);
 
        /*
         * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -754,17 +734,51 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
        return ret;
 }
 
-#ifdef CONFIG_KEXEC_CORE
-static void crash_vmclear_local_loaded_vmcss(void)
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+static int kvm_cpu_vmxoff(void)
+{
+       asm_volatile_goto("1: vmxoff\n\t"
+                         _ASM_EXTABLE(1b, %l[fault])
+                         ::: "cc", "memory" : fault);
+
+       cr4_clear_bits(X86_CR4_VMXE);
+       return 0;
+
+fault:
+       cr4_clear_bits(X86_CR4_VMXE);
+       return -EIO;
+}
+
+static void vmx_emergency_disable(void)
 {
        int cpu = raw_smp_processor_id();
        struct loaded_vmcs *v;
 
+       kvm_rebooting = true;
+
+       /*
+        * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+        * set in task context.  If this races with VMX is disabled by an NMI,
+        * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
+        * kvm_rebooting set.
+        */
+       if (!(__read_cr4() & X86_CR4_VMXE))
+               return;
+
        list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
                            loaded_vmcss_on_cpu_link)
                vmcs_clear(v->vmcs);
+
+       kvm_cpu_vmxoff();
 }
-#endif /* CONFIG_KEXEC_CORE */
 
 static void __loaded_vmcs_clear(void *arg)
 {
@@ -1899,25 +1913,14 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
        return kvm_caps.default_tsc_scaling_ratio;
 }
 
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
-       vmcs_write64(TSC_OFFSET, offset);
+       vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 }
 
-static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-       vmcs_write64(TSC_MULTIPLIER, multiplier);
-}
-
-/*
- * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
- * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
- * all guests if the "nested" module option is off, and can also be disabled
- * for a single guest by disabling its VMX cpuid bit.
- */
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
-{
-       return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
+       vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
 }
 
 /*
@@ -2047,7 +2050,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
                break;
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
                                    &msr_info->data))
@@ -2355,7 +2358,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
                if (!msr_info->host_initiated)
                        return 1; /* they are read-only */
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                return vmx_set_vmx_msr(vcpu, msr_index, data);
        case MSR_IA32_RTIT_CTL:
@@ -2729,11 +2732,11 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
        return 0;
 }
 
-static bool kvm_is_vmx_supported(void)
+static bool __kvm_is_vmx_supported(void)
 {
-       int cpu = raw_smp_processor_id();
+       int cpu = smp_processor_id();
 
-       if (!cpu_has_vmx()) {
+       if (!(cpuid_ecx(1) & feature_bit(VMX))) {
                pr_err("VMX not supported by CPU %d\n", cpu);
                return false;
        }
@@ -2747,13 +2750,24 @@ static bool kvm_is_vmx_supported(void)
        return true;
 }
 
+static bool kvm_is_vmx_supported(void)
+{
+       bool supported;
+
+       migrate_disable();
+       supported = __kvm_is_vmx_supported();
+       migrate_enable();
+
+       return supported;
+}
+
 static int vmx_check_processor_compat(void)
 {
        int cpu = raw_smp_processor_id();
        struct vmcs_config vmcs_conf;
        struct vmx_capability vmx_cap;
 
-       if (!kvm_is_vmx_supported())
+       if (!__kvm_is_vmx_supported())
                return -EIO;
 
        if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
@@ -2833,7 +2847,7 @@ static void vmx_hardware_disable(void)
 {
        vmclear_local_loaded_vmcss();
 
-       if (cpu_vmxoff())
+       if (kvm_cpu_vmxoff())
                kvm_spurious_fault();
 
        hv_reset_evmcs();
@@ -3071,13 +3085,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
        vmx->rmode.vm86_active = 1;
 
-       /*
-        * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-        * vcpu. Warn the user that an update is overdue.
-        */
-       if (!kvm_vmx->tss_addr)
-               pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
-
        vmx_segment_cache_clear(vmx);
 
        vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
@@ -3350,7 +3357,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
-static int vmx_get_max_tdp_level(void)
+static int vmx_get_max_ept_level(void)
 {
        if (cpu_has_vmx_ept_5levels())
                return 5;
@@ -4553,16 +4560,19 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
  * based on a single guest CPUID bit, with a dedicated feature bit.  This also
  * verifies that the control is actually supported by KVM and hardware.
  */
-#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
-({                                                                      \
-       bool __enabled;                                                  \
-                                                                        \
-       if (cpu_has_vmx_##name()) {                                      \
-               __enabled = guest_cpuid_has(&(vmx)->vcpu,                \
-                                           X86_FEATURE_##feat_name);    \
-               vmx_adjust_secondary_exec_control(vmx, exec_control,     \
-                       SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
-       }                                                                \
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting)    \
+({                                                                                             \
+       struct kvm_vcpu *__vcpu = &(vmx)->vcpu;                                                 \
+       bool __enabled;                                                                         \
+                                                                                               \
+       if (cpu_has_vmx_##name()) {                                                             \
+               if (kvm_is_governed_feature(X86_FEATURE_##feat_name))                           \
+                       __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name);             \
+               else                                                                            \
+                       __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name);           \
+               vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
+                                                 __enabled, exiting);                          \
+       }                                                                                       \
 })
 
 /* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
@@ -4622,19 +4632,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
        if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-       if (cpu_has_vmx_xsaves()) {
-               /* Exposing XSAVES only when XSAVE is exposed */
-               bool xsaves_enabled =
-                       boot_cpu_has(X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
-
-               vcpu->arch.xsaves_enabled = xsaves_enabled;
-
-               vmx_adjust_secondary_exec_control(vmx, &exec_control,
-                                                 SECONDARY_EXEC_XSAVES,
-                                                 xsaves_enabled, false);
-       }
+       vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
 
        /*
         * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
@@ -4653,6 +4651,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
                                                  SECONDARY_EXEC_ENABLE_RDTSCP,
                                                  rdpid_or_rdtscp_enabled, false);
        }
+
        vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -6796,8 +6795,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
        vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
        read_unlock(&vcpu->kvm->mmu_lock);
 
-       vmx_flush_tlb_current(vcpu);
-
+       /*
+        * No need for a manual TLB flush at this point, KVM has already done a
+        * flush if there were SPTEs pointing at the previous page.
+        */
 out:
        /*
         * Do not pin apic access page in memory, the MMU notifier
@@ -7243,13 +7244,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                   flags);
 
        vcpu->arch.cr2 = native_read_cr2();
+       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+       vmx->idt_vectoring_info = 0;
 
        vmx_enable_fb_clear(vmx);
 
-       if (unlikely(vmx->fail))
+       if (unlikely(vmx->fail)) {
                vmx->exit_reason.full = 0xdead;
-       else
-               vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+               goto out;
+       }
+
+       vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+       if (likely(!vmx->exit_reason.failed_vmentry))
+               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
        if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
            is_nmi(vmx_get_intr_info(vcpu))) {
@@ -7258,6 +7266,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                kvm_after_interrupt(vcpu);
        }
 
+out:
        guest_state_exit_irqoff();
 }
 
@@ -7379,8 +7388,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        loadsegment(es, __USER_DS);
 #endif
 
-       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
        pt_guest_exit(vmx);
 
        kvm_load_host_xsave_state(vcpu);
@@ -7397,17 +7404,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmx->nested.nested_run_pending = 0;
        }
 
-       vmx->idt_vectoring_info = 0;
-
        if (unlikely(vmx->fail))
                return EXIT_FASTPATH_NONE;
 
        if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
                kvm_machine_check();
 
-       if (likely(!vmx->exit_reason.failed_vmentry))
-               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
        trace_kvm_exit(vcpu, KVM_ISA_VMX);
 
        if (unlikely(vmx->exit_reason.failed_vmentry))
@@ -7751,8 +7753,16 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
-       vcpu->arch.xsaves_enabled = false;
+       /*
+        * XSAVES is effectively enabled if and only if XSAVE is also exposed
+        * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
+        * set if and only if XSAVE is supported.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
+
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
 
        vmx_setup_uret_msrs(vmx);
 
@@ -7760,7 +7770,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vmcs_set_secondary_exec_control(vmx,
                                                vmx_secondary_exec_control(vmx));
 
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                vmx->msr_ia32_feature_control_valid_bits |=
                        FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                        FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@@ -7769,7 +7779,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                        ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                          FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
 
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                nested_vmx_cr_fixed1_bits_update(vcpu);
 
        if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -8526,7 +8536,7 @@ static __init int hardware_setup(void)
         */
        vmx_setup_me_spte_mask();
 
-       kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+       kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
                          ept_caps_to_lpage_level(vmx_capability.ept));
 
        /*
@@ -8622,10 +8632,8 @@ static void __vmx_exit(void)
 {
        allow_smaller_maxphyaddr = false;
 
-#ifdef CONFIG_KEXEC_CORE
-       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-       synchronize_rcu();
-#endif
+       cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
+
        vmx_cleanup_l1d_flush();
 }
 
@@ -8666,18 +8674,14 @@ static int __init vmx_init(void)
        if (r)
                goto err_l1d_flush;
 
-       vmx_setup_fb_clear_ctrl();
-
        for_each_possible_cpu(cpu) {
                INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
 
                pi_init_cpu(cpu);
        }
 
-#ifdef CONFIG_KEXEC_CORE
-       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
-                          crash_vmclear_local_loaded_vmcss);
-#endif
+       cpu_emergency_register_virt_callback(vmx_emergency_disable);
+
        vmx_check_vmcs12_offsets();
 
        /*
index 32384ba..c2130d2 100644 (file)
@@ -374,7 +374,6 @@ struct kvm_vmx {
        u64 *pid_table;
 };
 
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
                        struct loaded_vmcs *buddy);
 int allocate_vpid(void);
@@ -562,7 +561,7 @@ static inline u8 vmx_get_rvi(void)
         SECONDARY_EXEC_APIC_REGISTER_VIRT |                            \
         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |                         \
         SECONDARY_EXEC_SHADOW_VMCS |                                   \
-        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_ENABLE_XSAVES |                                 \
         SECONDARY_EXEC_RDSEED_EXITING |                                \
         SECONDARY_EXEC_RDRAND_EXITING |                                \
         SECONDARY_EXEC_ENABLE_PML |                                    \
index c381770..6c9c81e 100644 (file)
@@ -25,6 +25,7 @@
 #include "tss.h"
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
+#include "mmu/page_track.h"
 #include "x86.h"
 #include "cpuid.h"
 #include "pmu.h"
@@ -237,6 +238,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
 u64 __read_mostly host_xss;
 EXPORT_SYMBOL_GPL(host_xss);
 
+u64 __read_mostly host_arch_capabilities;
+EXPORT_SYMBOL_GPL(host_arch_capabilities);
+
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
        KVM_GENERIC_VM_STATS(),
        STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -1021,7 +1025,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
@@ -1052,7 +1056,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
 
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, host_xss);
        }
@@ -1620,12 +1624,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
 
 static u64 kvm_get_arch_capabilities(void)
 {
-       u64 data = 0;
-
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
-               data &= KVM_SUPPORTED_ARCH_CAP;
-       }
+       u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
 
        /*
         * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -2631,7 +2630,7 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
        else
                vcpu->arch.tsc_offset = l1_offset;
 
-       static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
+       static_call(kvm_x86_write_tsc_offset)(vcpu);
 }
 
 static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
@@ -2647,8 +2646,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
                vcpu->arch.tsc_scaling_ratio = l1_multiplier;
 
        if (kvm_caps.has_tsc_control)
-               static_call(kvm_x86_write_tsc_multiplier)(
-                       vcpu, vcpu->arch.tsc_scaling_ratio);
+               static_call(kvm_x86_write_tsc_multiplier)(vcpu);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
@@ -4665,7 +4663,6 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
                return 0;
        default:
                return -ENXIO;
-               break;
        }
 }
 
@@ -6532,7 +6529,7 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
                              struct kvm_msr_filter_range *user_range)
 {
-       unsigned long *bitmap = NULL;
+       unsigned long *bitmap;
        size_t bitmap_size;
 
        if (!user_range->nmsrs)
@@ -8245,11 +8242,6 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
        return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
 }
 
-static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
-{
-       return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
-}
-
 static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
 {
        return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
@@ -8351,7 +8343,6 @@ static const struct x86_emulate_ops emulate_ops = {
        .fix_hypercall       = emulator_fix_hypercall,
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
-       .guest_has_long_mode = emulator_guest_has_long_mode,
        .guest_has_movbe     = emulator_guest_has_movbe,
        .guest_has_fxsr      = emulator_guest_has_fxsr,
        .guest_has_rdpid     = emulator_guest_has_rdpid,
@@ -9172,7 +9163,7 @@ static int kvmclock_cpu_down_prep(unsigned int cpu)
 static void tsc_khz_changed(void *data)
 {
        struct cpufreq_freqs *freq = data;
-       unsigned long khz = 0;
+       unsigned long khz;
 
        WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
 
@@ -9512,6 +9503,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 
        kvm_init_pmu_capability(ops->pmu_ops);
 
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+
        r = ops->hardware_setup();
        if (r != 0)
                goto out_mmu_exit;
@@ -11111,12 +11105,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        goto out;
                }
+
                /*
-                * It should be impossible for the hypervisor timer to be in
-                * use before KVM has ever run the vCPU.
+                * Don't bother switching APIC timer emulation from the
+                * hypervisor timer to the software timer, the only way for the
+                * APIC timer to be active is if userspace stuffed vCPU state,
+                * i.e. put the vCPU into a nonsensical state.  Only an INIT
+                * will transition the vCPU out of UNINITIALIZED (without more
+                * state stuffing from userspace), which will reset the local
+                * APIC and thus cancel the timer or drop the IRQ (if the timer
+                * already expired).
                 */
-               WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
-
                kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_block(vcpu);
                kvm_vcpu_srcu_read_lock(vcpu);
@@ -11798,15 +11797,22 @@ static int sync_regs(struct kvm_vcpu *vcpu)
                __set_regs(vcpu, &vcpu->run->s.regs.regs);
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
-               if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+               struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
+
+               if (__set_sregs(vcpu, &sregs))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
-               if (kvm_vcpu_ioctl_x86_set_vcpu_events(
-                               vcpu, &vcpu->run->s.regs.events))
+               struct kvm_vcpu_events events = vcpu->run->s.regs.events;
+
+               if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
        }
 
@@ -12627,6 +12633,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
+       /*
+        * KVM doesn't support moving memslots when there are external page
+        * trackers attached to the VM, i.e. if KVMGT is in use.
+        */
+       if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm))
+               return -EINVAL;
+
        if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
                if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
                        return -EINVAL;
@@ -12772,7 +12785,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                 * See is_writable_pte() for more details (the case involving
                 * access-tracked SPTEs is particularly relevant).
                 */
-               kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+               kvm_flush_remote_tlbs_memslot(kvm, new);
        }
 }
 
@@ -12781,6 +12794,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
+       if (change == KVM_MR_DELETE)
+               kvm_page_track_delete_slot(kvm, old);
+
        if (!kvm->arch.n_requested_mmu_pages &&
            (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
                unsigned long nr_mmu_pages;
@@ -12797,17 +12813,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                kvm_arch_free_memslot(kvm, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-       kvm_mmu_zap_all(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-                                  struct kvm_memory_slot *slot)
-{
-       kvm_page_track_flush_slot(kvm, slot);
-}
-
 static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
        return (is_guest_mode(vcpu) &&
index 82e3daf..1e7be1f 100644 (file)
@@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 host_xss;
+extern u64 host_arch_capabilities;
 
 extern struct kvm_caps kvm_caps;
 
index a5488cc..7d79207 100644 (file)
@@ -71,6 +71,9 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
        def_bool n
 
+config ARCH_MTD_XIP
+       def_bool y
+
 config NO_IOPORT_MAP
        def_bool n
 
index 0e1bb6f..3f5ffae 100644 (file)
 #define XTENSA_STACK_ALIGNMENT 16
 #endif
 
+#ifndef XCHAL_HW_MIN_VERSION
+#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR)
+#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \
+                             XCHAL_HW_MIN_VERSION_MINOR)
+#else
+#define XCHAL_HW_MIN_VERSION 0
+#endif
+#endif
+
 #endif
diff --git a/arch/xtensa/include/asm/mtd-xip.h b/arch/xtensa/include/asm/mtd-xip.h
new file mode 100644 (file)
index 0000000..5143251
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_MTD_XIP_H
+#define _ASM_MTD_XIP_H
+
+#include <asm/processor.h>
+
+#define xip_irqpending()       (xtensa_get_sr(interrupt) & xtensa_get_sr(intenable))
+#define xip_currtime()         (xtensa_get_sr(ccount))
+#define xip_elapsed_since(x)   ((xtensa_get_sr(ccount) - (x)) / 1000) /* should work up to 1GHz */
+#define xip_cpu_idle()         do { asm volatile ("waiti 0"); } while (0)
+
+#endif /* _ASM_MTD_XIP_H */
+
index 3bc6b9a..e5da6d7 100644 (file)
@@ -34,6 +34,10 @@ extern char _SecondaryResetVector_text_start[];
 extern char _SecondaryResetVector_text_end[];
 #endif
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+extern char _xip_text_start[];
+extern char _xip_text_end[];
+#endif
 extern char _xip_start[];
 extern char _xip_end[];
 #endif
index a0d05c8..1836180 100644 (file)
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 
+#include <asm/core.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 
+#define XTENSA_HWVERSION_RG_2015_0     260000
+
+#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
+#define XTENSA_PMU_ERI_BASE            0x00101000
+#else
+#define XTENSA_PMU_ERI_BASE            0x00001000
+#endif
+
 /* Global control/status for all perf counters */
-#define XTENSA_PMU_PMG                 0x1000
+#define XTENSA_PMU_PMG                 XTENSA_PMU_ERI_BASE
 /* Perf counter values */
-#define XTENSA_PMU_PM(i)               (0x1080 + (i) * 4)
+#define XTENSA_PMU_PM(i)               (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
 /* Perf counter control registers */
-#define XTENSA_PMU_PMCTRL(i)           (0x1100 + (i) * 4)
+#define XTENSA_PMU_PMCTRL(i)           (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
 /* Perf counter status registers */
-#define XTENSA_PMU_PMSTAT(i)           (0x1180 + (i) * 4)
+#define XTENSA_PMU_PMSTAT(i)           (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
 
 #define XTENSA_PMU_PMG_PMEN            0x1
 
index aba3ff4..52d6e48 100644 (file)
@@ -311,6 +311,9 @@ void __init setup_arch(char **cmdline_p)
 
        mem_reserve(__pa(_stext), __pa(_end));
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+       mem_reserve(__pa(_xip_text_start), __pa(_xip_text_end));
+#endif
        mem_reserve(__pa(_xip_start), __pa(_xip_end));
 #endif
 
index c14fd96..f47e9bb 100644 (file)
@@ -118,6 +118,7 @@ SECTIONS
     SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 
     *(.exception.text)
+    *(.xiptext)
 #endif
 
     IRQENTRY_TEXT
@@ -201,6 +202,9 @@ SECTIONS
                   .DebugInterruptVector.text);
     RELOCATE_ENTRY(_exception_text,
                   .exception.text);
+#ifdef CONFIG_XIP_KERNEL
+    RELOCATE_ENTRY(_xip_text, .xiptext);
+#endif
 #endif
 #ifdef CONFIG_XIP_KERNEL
     RELOCATE_ENTRY(_xip_data, .data);
@@ -319,7 +323,12 @@ SECTIONS
                  LAST)
 #undef LAST
 #define LAST .exception.text
-
+  SECTION_VECTOR4 (_xip_text,
+                 .xiptext,
+                 ,
+                 LAST)
+#undef LAST
+#define LAST .xiptext
 #endif
   . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3;
 
index 831bfd2..bdddef2 100644 (file)
@@ -118,8 +118,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
        struct vpu_jsm_msg resp;
        int ret;
 
-       if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
-               return -ENOMEM;
+       strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
 
        ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
                                    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
index addba10..abb5911 100644 (file)
@@ -421,6 +421,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
        { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
        { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
+       /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
+       { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
 
        /* JMicron 360/1/3/5/6, match class to avoid IDE function */
        { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -807,7 +809,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
 static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
                              unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index c2b6be0..64f7f7d 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/libata.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include "ahci.h"
index 9604a2f..ed263de 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/log2.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
index 5083fb6..adc851c 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/libata.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
index 7645015..f318735 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/mbus.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include "ahci.h"
 
index e89807f..9accf89 100644 (file)
@@ -31,13 +31,11 @@ static int ahci_octeon_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct device_node *node = dev->of_node;
-       struct resource *res;
        void __iomem *base;
        u64 cfg;
        int ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(&pdev->dev, res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
index 3d01b11..b1a4e57 100644 (file)
@@ -12,9 +12,7 @@
 #include <linux/pm.h>
 #include <linux/ahci_platform.h>
 #include <linux/device.h>
-#include <linux/of_address.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include "ahci.h"
@@ -90,7 +88,7 @@ MODULE_DEVICE_TABLE(acpi, ahci_qoriq_acpi_match);
 static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
                          unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        void __iomem *port_mmio = ahci_port_base(link->ap);
        u32 px_cmd, px_is, px_val;
        struct ata_port *ap = link->ap;
index 2c32d58..59f97aa 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/ahci_platform.h>
@@ -132,8 +131,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
        if (!plat_data)
                return &ahci_port_info;
 
-       plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
-                             platform_get_resource(pdev, IORESOURCE_MEM, 1));
+       plat_data->sgpio_ctrl = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(plat_data->sgpio_ctrl))
                return &ahci_port_info;
 
index 04531fa..58b2683 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/clk.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include "ahci.h"
index 21c2079..8703c2a 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -530,8 +530,7 @@ static int tegra_ahci_probe(struct platform_device *pdev)
        tegra->pdev = pdev;
        tegra->soc = of_device_get_match_data(&pdev->dev);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       tegra->sata_regs = devm_ioremap_resource(&pdev->dev, res);
+       tegra->sata_regs = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(tegra->sata_regs))
                return PTR_ERR(tegra->sata_regs);
 
index eb773f2..ccef5e6 100644 (file)
@@ -110,9 +110,8 @@ static int xgene_ahci_init_memram(struct xgene_ahci_context *ctx)
  * @timeout : timeout for achieving the value.
  */
 static int xgene_ahci_poll_reg_val(struct ata_port *ap,
-                                  void __iomem *reg, unsigned
-                                  int val, unsigned long interval,
-                                  unsigned long timeout)
+                                  void __iomem *reg, unsigned int val,
+                                  unsigned int interval, unsigned int timeout)
 {
        unsigned long deadline;
        unsigned int tmp;
@@ -350,7 +349,7 @@ static void xgene_ahci_set_phy_cfg(struct xgene_ahci_context *ctx, int channel)
 static int xgene_ahci_do_hardreset(struct ata_link *link,
                                   unsigned long deadline, bool *online)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_host_priv *hpriv = ap->host->private_data;
        struct xgene_ahci_context *ctx = hpriv->plat_data;
@@ -755,20 +754,17 @@ static int xgene_ahci_probe(struct platform_device *pdev)
        ctx->dev = dev;
 
        /* Retrieve the IP core resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       ctx->csr_core = devm_ioremap_resource(dev, res);
+       ctx->csr_core = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(ctx->csr_core))
                return PTR_ERR(ctx->csr_core);
 
        /* Retrieve the IP diagnostic resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-       ctx->csr_diag = devm_ioremap_resource(dev, res);
+       ctx->csr_diag = devm_platform_ioremap_resource(pdev, 2);
        if (IS_ERR(ctx->csr_diag))
                return PTR_ERR(ctx->csr_diag);
 
        /* Retrieve the IP AXI resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-       ctx->csr_axi = devm_ioremap_resource(dev, res);
+       ctx->csr_axi = devm_platform_ioremap_resource(pdev, 3);
        if (IS_ERR(ctx->csr_axi))
                return PTR_ERR(ctx->csr_axi);
 
index 06aec35..e2baced 100644 (file)
@@ -1403,7 +1403,7 @@ EXPORT_SYMBOL_GPL(ahci_kick_engine);
 
 static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
                                struct ata_taskfile *tf, int is_cmd, u16 flags,
-                               unsigned long timeout_msec)
+                               unsigned int timeout_msec)
 {
        const u32 cmd_fis_len = 5; /* five dwords */
        struct ahci_port_priv *pp = ap->private_data;
@@ -1448,7 +1448,8 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
        struct ahci_host_priv *hpriv = ap->host->private_data;
        struct ahci_port_priv *pp = ap->private_data;
        const char *reason = NULL;
-       unsigned long now, msecs;
+       unsigned long now;
+       unsigned int msecs;
        struct ata_taskfile tf;
        bool fbs_disabled = false;
        int rc;
@@ -1587,7 +1588,7 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class,
 int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
                      unsigned long deadline, bool *online)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index 9a8d43f..581704e 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/ahci_platform.h>
 #include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/reset.h>
 #include "ahci.h"
index 04db0f2..7431431 100644 (file)
@@ -1586,13 +1586,11 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev,
                }
        }
 
-       if (ap->ops->error_handler)
-               ata_eh_release(ap);
+       ata_eh_release(ap);
 
        rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
 
-       if (ap->ops->error_handler)
-               ata_eh_acquire(ap);
+       ata_eh_acquire(ap);
 
        ata_sff_flush_pio_task(ap);
 
@@ -1607,10 +1605,7 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev,
                if (qc->flags & ATA_QCFLAG_ACTIVE) {
                        qc->err_mask |= AC_ERR_TIMEOUT;
 
-                       if (ap->ops->error_handler)
-                               ata_port_freeze(ap);
-                       else
-                               ata_qc_complete(qc);
+                       ata_port_freeze(ap);
 
                        ata_dev_warn(dev, "qc timeout after %u msecs (cmd 0x%x)\n",
                                     timeout, command);
@@ -3063,144 +3058,6 @@ int ata_cable_sata(struct ata_port *ap)
 EXPORT_SYMBOL_GPL(ata_cable_sata);
 
 /**
- *     ata_bus_probe - Reset and probe ATA bus
- *     @ap: Bus to probe
- *
- *     Master ATA bus probing function.  Initiates a hardware-dependent
- *     bus reset, then attempts to identify any devices found on
- *     the bus.
- *
- *     LOCKING:
- *     PCI/etc. bus probe sem.
- *
- *     RETURNS:
- *     Zero on success, negative errno otherwise.
- */
-
-int ata_bus_probe(struct ata_port *ap)
-{
-       unsigned int classes[ATA_MAX_DEVICES];
-       int tries[ATA_MAX_DEVICES];
-       int rc;
-       struct ata_device *dev;
-
-       ata_for_each_dev(dev, &ap->link, ALL)
-               tries[dev->devno] = ATA_PROBE_MAX_TRIES;
-
- retry:
-       ata_for_each_dev(dev, &ap->link, ALL) {
-               /* If we issue an SRST then an ATA drive (not ATAPI)
-                * may change configuration and be in PIO0 timing. If
-                * we do a hard reset (or are coming from power on)
-                * this is true for ATA or ATAPI. Until we've set a
-                * suitable controller mode we should not touch the
-                * bus as we may be talking too fast.
-                */
-               dev->pio_mode = XFER_PIO_0;
-               dev->dma_mode = 0xff;
-
-               /* If the controller has a pio mode setup function
-                * then use it to set the chipset to rights. Don't
-                * touch the DMA setup as that will be dealt with when
-                * configuring devices.
-                */
-               if (ap->ops->set_piomode)
-                       ap->ops->set_piomode(ap, dev);
-       }
-
-       /* reset and determine device classes */
-       ap->ops->phy_reset(ap);
-
-       ata_for_each_dev(dev, &ap->link, ALL) {
-               if (dev->class != ATA_DEV_UNKNOWN)
-                       classes[dev->devno] = dev->class;
-               else
-                       classes[dev->devno] = ATA_DEV_NONE;
-
-               dev->class = ATA_DEV_UNKNOWN;
-       }
-
-       /* read IDENTIFY page and configure devices. We have to do the identify
-          specific sequence bass-ackwards so that PDIAG- is released by
-          the slave device */
-
-       ata_for_each_dev(dev, &ap->link, ALL_REVERSE) {
-               if (tries[dev->devno])
-                       dev->class = classes[dev->devno];
-
-               if (!ata_dev_enabled(dev))
-                       continue;
-
-               rc = ata_dev_read_id(dev, &dev->class, ATA_READID_POSTRESET,
-                                    dev->id);
-               if (rc)
-                       goto fail;
-       }
-
-       /* Now ask for the cable type as PDIAG- should have been released */
-       if (ap->ops->cable_detect)
-               ap->cbl = ap->ops->cable_detect(ap);
-
-       /* We may have SATA bridge glue hiding here irrespective of
-        * the reported cable types and sensed types.  When SATA
-        * drives indicate we have a bridge, we don't know which end
-        * of the link the bridge is which is a problem.
-        */
-       ata_for_each_dev(dev, &ap->link, ENABLED)
-               if (ata_id_is_sata(dev->id))
-                       ap->cbl = ATA_CBL_SATA;
-
-       /* After the identify sequence we can now set up the devices. We do
-          this in the normal order so that the user doesn't get confused */
-
-       ata_for_each_dev(dev, &ap->link, ENABLED) {
-               ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO;
-               rc = ata_dev_configure(dev);
-               ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO;
-               if (rc)
-                       goto fail;
-       }
-
-       /* configure transfer mode */
-       rc = ata_set_mode(&ap->link, &dev);
-       if (rc)
-               goto fail;
-
-       ata_for_each_dev(dev, &ap->link, ENABLED)
-               return 0;
-
-       return -ENODEV;
-
- fail:
-       tries[dev->devno]--;
-
-       switch (rc) {
-       case -EINVAL:
-               /* eeek, something went very wrong, give up */
-               tries[dev->devno] = 0;
-               break;
-
-       case -ENODEV:
-               /* give it just one more chance */
-               tries[dev->devno] = min(tries[dev->devno], 1);
-               fallthrough;
-       case -EIO:
-               if (tries[dev->devno] == 1) {
-                       /* This is the last chance, better to slow
-                        * down than lose it.
-                        */
-                       sata_down_spd_limit(&ap->link, 0);
-                       ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
-               }
-       }
-
-       if (!tries[dev->devno])
-               ata_dev_disable(dev);
-
-       goto retry;
-}
-
-/**
  *     sata_print_link_status - Print SATA link status
  *     @link: SATA link to printk link status about
  *
@@ -3782,7 +3639,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline)
 {
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
-       const unsigned long *timing = sata_ehc_deb_timing(ehc);
+       const unsigned int *timing = sata_ehc_deb_timing(ehc);
        int rc;
 
        /* if we're about to do hardreset, nothing more to do */
@@ -3824,7 +3681,7 @@ EXPORT_SYMBOL_GPL(ata_std_prereset);
 int sata_std_hardreset(struct ata_link *link, unsigned int *class,
                       unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        bool online;
        int rc;
 
@@ -4213,10 +4070,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
-       { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M5[15]0_*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Micron_1100_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
@@ -4874,126 +4733,103 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
 void ata_qc_complete(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
+       struct ata_device *dev = qc->dev;
+       struct ata_eh_info *ehi = &dev->link->eh_info;
 
        /* Trigger the LED (if available) */
        ledtrig_disk_activity(!!(qc->tf.flags & ATA_TFLAG_WRITE));
 
-       /* XXX: New EH and old EH use different mechanisms to
-        * synchronize EH with regular execution path.
-        *
-        * In new EH, a qc owned by EH is marked with ATA_QCFLAG_EH.
-        * Normal execution path is responsible for not accessing a
-        * qc owned by EH.  libata core enforces the rule by returning NULL
-        * from ata_qc_from_tag() for qcs owned by EH.
+       /*
+        * In order to synchronize EH with the regular execution path, a qc that
+        * is owned by EH is marked with ATA_QCFLAG_EH.
         *
-        * Old EH depends on ata_qc_complete() nullifying completion
-        * requests if ATA_QCFLAG_EH_SCHEDULED is set.  Old EH does
-        * not synchronize with interrupt handler.  Only PIO task is
-        * taken care of.
+        * The normal execution path is responsible for not accessing a qc owned
+        * by EH.  libata core enforces the rule by returning NULL from
+        * ata_qc_from_tag() for qcs owned by EH.
         */
-       if (ap->ops->error_handler) {
-               struct ata_device *dev = qc->dev;
-               struct ata_eh_info *ehi = &dev->link->eh_info;
-
-               if (unlikely(qc->err_mask))
-                       qc->flags |= ATA_QCFLAG_EH;
+       if (unlikely(qc->err_mask))
+               qc->flags |= ATA_QCFLAG_EH;
 
-               /*
-                * Finish internal commands without any further processing
-                * and always with the result TF filled.
-                */
-               if (unlikely(ata_tag_internal(qc->tag))) {
-                       fill_result_tf(qc);
-                       trace_ata_qc_complete_internal(qc);
-                       __ata_qc_complete(qc);
-                       return;
-               }
+       /*
+        * Finish internal commands without any further processing and always
+        * with the result TF filled.
+        */
+       if (unlikely(ata_tag_internal(qc->tag))) {
+               fill_result_tf(qc);
+               trace_ata_qc_complete_internal(qc);
+               __ata_qc_complete(qc);
+               return;
+       }
 
-               /*
-                * Non-internal qc has failed.  Fill the result TF and
-                * summon EH.
-                */
-               if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
-                       fill_result_tf(qc);
-                       trace_ata_qc_complete_failed(qc);
-                       ata_qc_schedule_eh(qc);
-                       return;
-               }
+       /* Non-internal qc has failed.  Fill the result TF and summon EH. */
+       if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
+               fill_result_tf(qc);
+               trace_ata_qc_complete_failed(qc);
+               ata_qc_schedule_eh(qc);
+               return;
+       }
 
-               WARN_ON_ONCE(ata_port_is_frozen(ap));
+       WARN_ON_ONCE(ata_port_is_frozen(ap));
 
-               /* read result TF if requested */
-               if (qc->flags & ATA_QCFLAG_RESULT_TF)
-                       fill_result_tf(qc);
+       /* read result TF if requested */
+       if (qc->flags & ATA_QCFLAG_RESULT_TF)
+               fill_result_tf(qc);
 
-               trace_ata_qc_complete_done(qc);
+       trace_ata_qc_complete_done(qc);
 
+       /*
+        * For CDL commands that completed without an error, check if we have
+        * sense data (ATA_SENSE is set). If we do, then the command may have
+        * been aborted by the device due to a limit timeout using the policy
+        * 0xD. For these commands, invoke EH to get the command sense data.
+        */
+       if (qc->result_tf.status & ATA_SENSE &&
+           ((ata_is_ncq(qc->tf.protocol) &&
+             dev->flags & ATA_DFLAG_CDL_ENABLED) ||
+            (!ata_is_ncq(qc->tf.protocol) &&
+             ata_id_sense_reporting_enabled(dev->id)))) {
                /*
-                * For CDL commands that completed without an error, check if
-                * we have sense data (ATA_SENSE is set). If we do, then the
-                * command may have been aborted by the device due to a limit
-                * timeout using the policy 0xD. For these commands, invoke EH
-                * to get the command sense data.
+                * Tell SCSI EH to not overwrite scmd->result even if this
+                * command is finished with result SAM_STAT_GOOD.
                 */
-               if (qc->result_tf.status & ATA_SENSE &&
-                   ((ata_is_ncq(qc->tf.protocol) &&
-                     dev->flags & ATA_DFLAG_CDL_ENABLED) ||
-                    (!ata_is_ncq(qc->tf.protocol) &&
-                     ata_id_sense_reporting_enabled(dev->id)))) {
-                       /*
-                        * Tell SCSI EH to not overwrite scmd->result even if
-                        * this command is finished with result SAM_STAT_GOOD.
-                        */
-                       qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
-                       qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
-                       ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
+               qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
+               qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
+               ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
 
-                       /*
-                        * set pending so that ata_qc_schedule_eh() does not
-                        * trigger fast drain, and freeze the port.
-                        */
-                       ap->pflags |= ATA_PFLAG_EH_PENDING;
-                       ata_qc_schedule_eh(qc);
-                       return;
-               }
-
-               /* Some commands need post-processing after successful
-                * completion.
+               /*
+                * set pending so that ata_qc_schedule_eh() does not trigger
+                * fast drain, and freeze the port.
                 */
-               switch (qc->tf.command) {
-               case ATA_CMD_SET_FEATURES:
-                       if (qc->tf.feature != SETFEATURES_WC_ON &&
-                           qc->tf.feature != SETFEATURES_WC_OFF &&
-                           qc->tf.feature != SETFEATURES_RA_ON &&
-                           qc->tf.feature != SETFEATURES_RA_OFF)
-                               break;
-                       fallthrough;
-               case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
-               case ATA_CMD_SET_MULTI: /* multi_count changed */
-                       /* revalidate device */
-                       ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
-                       ata_port_schedule_eh(ap);
-                       break;
+               ap->pflags |= ATA_PFLAG_EH_PENDING;
+               ata_qc_schedule_eh(qc);
+               return;
+       }
 
-               case ATA_CMD_SLEEP:
-                       dev->flags |= ATA_DFLAG_SLEEPING;
+       /* Some commands need post-processing after successful completion. */
+       switch (qc->tf.command) {
+       case ATA_CMD_SET_FEATURES:
+               if (qc->tf.feature != SETFEATURES_WC_ON &&
+                   qc->tf.feature != SETFEATURES_WC_OFF &&
+                   qc->tf.feature != SETFEATURES_RA_ON &&
+                   qc->tf.feature != SETFEATURES_RA_OFF)
                        break;
-               }
-
-               if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
-                       ata_verify_xfer(qc);
+               fallthrough;
+       case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
+       case ATA_CMD_SET_MULTI: /* multi_count changed */
+               /* revalidate device */
+               ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
+               ata_port_schedule_eh(ap);
+               break;
 
-               __ata_qc_complete(qc);
-       } else {
-               if (qc->flags & ATA_QCFLAG_EH_SCHEDULED)
-                       return;
+       case ATA_CMD_SLEEP:
+               dev->flags |= ATA_DFLAG_SLEEPING;
+               break;
+       }
 
-               /* read result TF if failed or requested */
-               if (qc->err_mask || qc->flags & ATA_QCFLAG_RESULT_TF)
-                       fill_result_tf(qc);
+       if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
+               ata_verify_xfer(qc);
 
-               __ata_qc_complete(qc);
-       }
+       __ata_qc_complete(qc);
 }
 EXPORT_SYMBOL_GPL(ata_qc_complete);
 
@@ -5039,11 +4875,8 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
        struct ata_link *link = qc->dev->link;
        u8 prot = qc->tf.protocol;
 
-       /* Make sure only one non-NCQ command is outstanding.  The
-        * check is skipped for old EH because it reuses active qc to
-        * request ATAPI sense.
-        */
-       WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag));
+       /* Make sure only one non-NCQ command is outstanding. */
+       WARN_ON_ONCE(ata_tag_valid(link->active_tag));
 
        if (ata_is_ncq(prot)) {
                WARN_ON_ONCE(link->sactive & (1 << qc->hw_tag));
@@ -5896,7 +5729,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(ata_host_init);
 
-void __ata_port_probe(struct ata_port *ap)
+void ata_port_probe(struct ata_port *ap)
 {
        struct ata_eh_info *ehi = &ap->link.eh_info;
        unsigned long flags;
@@ -5914,20 +5747,7 @@ void __ata_port_probe(struct ata_port *ap)
 
        spin_unlock_irqrestore(ap->lock, flags);
 }
-
-int ata_port_probe(struct ata_port *ap)
-{
-       int rc = 0;
-
-       if (ap->ops->error_handler) {
-               __ata_port_probe(ap);
-               ata_port_wait_eh(ap);
-       } else {
-               rc = ata_bus_probe(ap);
-       }
-       return rc;
-}
-
+EXPORT_SYMBOL_GPL(ata_port_probe);
 
 static void async_port_probe(void *data, async_cookie_t cookie)
 {
@@ -5943,7 +5763,8 @@ static void async_port_probe(void *data, async_cookie_t cookie)
        if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
                async_synchronize_cookie(cookie);
 
-       (void)ata_port_probe(ap);
+       ata_port_probe(ap);
+       ata_port_wait_eh(ap);
 
        /* in order to keep device order, we need to synchronize at this point */
        async_synchronize_cookie(cookie);
@@ -6130,9 +5951,6 @@ static void ata_port_detach(struct ata_port *ap)
        struct ata_link *link;
        struct ata_device *dev;
 
-       if (!ap->ops->error_handler)
-               goto skip_eh;
-
        /* tell EH we're leaving & flush EH */
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags |= ATA_PFLAG_UNLOADING;
@@ -6148,7 +5966,6 @@ static void ata_port_detach(struct ata_port *ap)
        cancel_delayed_work_sync(&ap->hotplug_task);
        cancel_delayed_work_sync(&ap->scsi_rescan_task);
 
- skip_eh:
        /* clean up zpodd on port removal */
        ata_for_each_link(link, ap, HOST_FIRST) {
                ata_for_each_dev(dev, link, ALL) {
@@ -6684,7 +6501,7 @@ EXPORT_SYMBOL_GPL(ata_msleep);
  *     The final register value.
  */
 u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
-                     unsigned long interval, unsigned long timeout)
+                     unsigned int interval, unsigned int timeout)
 {
        unsigned long deadline;
        u32 tmp;
index 35e0367..159ba6b 100644 (file)
@@ -78,12 +78,12 @@ enum {
  * are mostly for error handling, hotplug and those outlier devices that
  * take an exceptionally long time to recover from reset.
  */
-static const unsigned long ata_eh_reset_timeouts[] = {
+static const unsigned int ata_eh_reset_timeouts[] = {
        10000,  /* most drives spin up by 10sec */
        10000,  /* > 99% working drives spin up before 20sec */
        35000,  /* give > 30 secs of idleness for outlier devices */
         5000,  /* and sweet one last chance */
-       ULONG_MAX, /* > 1 min has elapsed, give up */
+       UINT_MAX, /* > 1 min has elapsed, give up */
 };
 
 static const unsigned int ata_eh_identify_timeouts[] = {
@@ -571,13 +571,10 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
        /* make sure sff pio task is not running */
        ata_sff_flush_pio_task(ap);
 
-       if (!ap->ops->error_handler)
-               return;
-
        /* synchronize with host lock and sort out timeouts */
 
        /*
-        * For new EH, all qcs are finished in one of three ways -
+        * For EH, all qcs are finished in one of three ways -
         * normal completion, error completion, and SCSI timeout.
         * Both completions can race against SCSI timeout.  When normal
         * completion wins, the qc never reaches EH.  When error
@@ -659,94 +656,87 @@ EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
 {
        unsigned long flags;
+       struct ata_link *link;
 
-       /* invoke error handler */
-       if (ap->ops->error_handler) {
-               struct ata_link *link;
-
-               /* acquire EH ownership */
-               ata_eh_acquire(ap);
+       /* acquire EH ownership */
+       ata_eh_acquire(ap);
  repeat:
-               /* kill fast drain timer */
-               del_timer_sync(&ap->fastdrain_timer);
+       /* kill fast drain timer */
+       del_timer_sync(&ap->fastdrain_timer);
 
-               /* process port resume request */
-               ata_eh_handle_port_resume(ap);
+       /* process port resume request */
+       ata_eh_handle_port_resume(ap);
 
-               /* fetch & clear EH info */
-               spin_lock_irqsave(ap->lock, flags);
+       /* fetch & clear EH info */
+       spin_lock_irqsave(ap->lock, flags);
 
-               ata_for_each_link(link, ap, HOST_FIRST) {
-                       struct ata_eh_context *ehc = &link->eh_context;
-                       struct ata_device *dev;
+       ata_for_each_link(link, ap, HOST_FIRST) {
+               struct ata_eh_context *ehc = &link->eh_context;
+               struct ata_device *dev;
 
-                       memset(&link->eh_context, 0, sizeof(link->eh_context));
-                       link->eh_context.i = link->eh_info;
-                       memset(&link->eh_info, 0, sizeof(link->eh_info));
+               memset(&link->eh_context, 0, sizeof(link->eh_context));
+               link->eh_context.i = link->eh_info;
+               memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-                       ata_for_each_dev(dev, link, ENABLED) {
-                               int devno = dev->devno;
+               ata_for_each_dev(dev, link, ENABLED) {
+                       int devno = dev->devno;
 
-                               ehc->saved_xfer_mode[devno] = dev->xfer_mode;
-                               if (ata_ncq_enabled(dev))
-                                       ehc->saved_ncq_enabled |= 1 << devno;
-                       }
+                       ehc->saved_xfer_mode[devno] = dev->xfer_mode;
+                       if (ata_ncq_enabled(dev))
+                               ehc->saved_ncq_enabled |= 1 << devno;
                }
+       }
 
-               ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
-               ap->pflags &= ~ATA_PFLAG_EH_PENDING;
-               ap->excl_link = NULL;   /* don't maintain exclusion over EH */
+       ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
+       ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       ap->excl_link = NULL;   /* don't maintain exclusion over EH */
 
-               spin_unlock_irqrestore(ap->lock, flags);
+       spin_unlock_irqrestore(ap->lock, flags);
 
-               /* invoke EH, skip if unloading or suspended */
-               if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
-                       ap->ops->error_handler(ap);
-               else {
-                       /* if unloading, commence suicide */
-                       if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
-                           !(ap->pflags & ATA_PFLAG_UNLOADED))
-                               ata_eh_unload(ap);
-                       ata_eh_finish(ap);
-               }
+       /* invoke EH, skip if unloading or suspended */
+       if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
+               ap->ops->error_handler(ap);
+       else {
+               /* if unloading, commence suicide */
+               if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
+                   !(ap->pflags & ATA_PFLAG_UNLOADED))
+                       ata_eh_unload(ap);
+               ata_eh_finish(ap);
+       }
 
-               /* process port suspend request */
-               ata_eh_handle_port_suspend(ap);
+       /* process port suspend request */
+       ata_eh_handle_port_suspend(ap);
 
-               /* Exception might have happened after ->error_handler
-                * recovered the port but before this point.  Repeat
-                * EH in such case.
-                */
-               spin_lock_irqsave(ap->lock, flags);
+       /*
+        * Exception might have happened after ->error_handler recovered the
+        * port but before this point.  Repeat EH in such case.
+        */
+       spin_lock_irqsave(ap->lock, flags);
 
-               if (ap->pflags & ATA_PFLAG_EH_PENDING) {
-                       if (--ap->eh_tries) {
-                               spin_unlock_irqrestore(ap->lock, flags);
-                               goto repeat;
-                       }
-                       ata_port_err(ap,
-                                    "EH pending after %d tries, giving up\n",
-                                    ATA_EH_MAX_TRIES);
-                       ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       if (ap->pflags & ATA_PFLAG_EH_PENDING) {
+               if (--ap->eh_tries) {
+                       spin_unlock_irqrestore(ap->lock, flags);
+                       goto repeat;
                }
+               ata_port_err(ap,
+                            "EH pending after %d tries, giving up\n",
+                            ATA_EH_MAX_TRIES);
+               ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       }
 
-               /* this run is complete, make sure EH info is clear */
-               ata_for_each_link(link, ap, HOST_FIRST)
-                       memset(&link->eh_info, 0, sizeof(link->eh_info));
+       /* this run is complete, make sure EH info is clear */
+       ata_for_each_link(link, ap, HOST_FIRST)
+               memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-               /* end eh (clear host_eh_scheduled) while holding
-                * ap->lock such that if exception occurs after this
-                * point but before EH completion, SCSI midlayer will
-                * re-initiate EH.
-                */
-               ap->ops->end_eh(ap);
+       /*
+        * end eh (clear host_eh_scheduled) while holding ap->lock such that if
+        * exception occurs after this point but before EH completion, SCSI
+        * midlayer will re-initiate EH.
+        */
+       ap->ops->end_eh(ap);
 
-               spin_unlock_irqrestore(ap->lock, flags);
-               ata_eh_release(ap);
-       } else {
-               WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
-               ap->ops->eng_timeout(ap);
-       }
+       spin_unlock_irqrestore(ap->lock, flags);
+       ata_eh_release(ap);
 
        scsi_eh_flush_done_q(&ap->eh_done_q);
 
@@ -912,8 +902,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
 
-       WARN_ON(!ap->ops->error_handler);
-
        qc->flags |= ATA_QCFLAG_EH;
        ata_eh_set_pending(ap, 1);
 
@@ -934,8 +922,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  */
 void ata_std_sched_eh(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        if (ap->pflags & ATA_PFLAG_INITIALIZING)
                return;
 
@@ -989,8 +975,6 @@ static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
        struct ata_queued_cmd *qc;
        int tag, nr_aborted = 0;
 
-       WARN_ON(!ap->ops->error_handler);
-
        /* we're gonna abort all commands, no need for fast drain */
        ata_eh_set_pending(ap, 0);
 
@@ -1065,8 +1049,6 @@ EXPORT_SYMBOL_GPL(ata_port_abort);
  */
 static void __ata_port_freeze(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        if (ap->ops->freeze)
                ap->ops->freeze(ap);
 
@@ -1091,8 +1073,6 @@ static void __ata_port_freeze(struct ata_port *ap)
  */
 int ata_port_freeze(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        __ata_port_freeze(ap);
 
        return ata_port_abort(ap);
@@ -1112,9 +1092,6 @@ void ata_eh_freeze_port(struct ata_port *ap)
 {
        unsigned long flags;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
        __ata_port_freeze(ap);
        spin_unlock_irqrestore(ap->lock, flags);
@@ -1134,9 +1111,6 @@ void ata_eh_thaw_port(struct ata_port *ap)
 {
        unsigned long flags;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
 
        ap->pflags &= ~ATA_PFLAG_FROZEN;
@@ -2575,7 +2549,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
        /*
         * Prepare to reset
         */
-       while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
+       while (ata_eh_reset_timeouts[max_tries] != UINT_MAX)
                max_tries++;
        if (link->flags & ATA_LFLAG_RST_ONCE)
                max_tries = 1;
index 85e279a..5d31c08 100644 (file)
 #include "libata-transport.h"
 
 /* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_normal[]           = {   5,  100, 2000 };
+const unsigned int sata_deb_timing_normal[]            = {   5,  100, 2000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
-const unsigned long sata_deb_timing_hotplug[]          = {  25,  500, 2000 };
+const unsigned int sata_deb_timing_hotplug[]           = {  25,  500, 2000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
-const unsigned long sata_deb_timing_long[]             = { 100, 2000, 5000 };
+const unsigned int sata_deb_timing_long[]              = { 100, 2000, 5000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_long);
 
 /**
@@ -232,11 +232,11 @@ EXPORT_SYMBOL_GPL(ata_tf_from_fis);
  *     RETURNS:
  *     0 on success, -errno on failure.
  */
-int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+int sata_link_debounce(struct ata_link *link, const unsigned int *params,
                       unsigned long deadline)
 {
-       unsigned long interval = params[0];
-       unsigned long duration = params[1];
+       unsigned int interval = params[0];
+       unsigned int duration = params[1];
        unsigned long last_jiffies, t;
        u32 last, cur;
        int rc;
@@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(sata_link_debounce);
  *     RETURNS:
  *     0 on success, -errno on failure.
  */
-int sata_link_resume(struct ata_link *link, const unsigned long *params,
+int sata_link_resume(struct ata_link *link, const unsigned int *params,
                     unsigned long deadline)
 {
        int tries = ATA_LINK_RESUME_TRIES;
@@ -528,7 +528,7 @@ EXPORT_SYMBOL_GPL(sata_set_spd);
  *     RETURNS:
  *     0 on success, -errno otherwise.
  */
-int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
+int sata_link_hardreset(struct ata_link *link, const unsigned int *timing,
                        unsigned long deadline,
                        bool *online, int (*check_ready)(struct ata_link *))
 {
@@ -1139,92 +1139,12 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host,
        ap->flags |= port_info->flags;
        ap->ops = port_info->port_ops;
        ap->cbl = ATA_CBL_SATA;
+       ap->print_id = atomic_inc_return(&ata_print_id);
 
        return ap;
 }
 EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
 
-/**
- *     ata_sas_port_start - Set port up for dma.
- *     @ap: Port to initialize
- *
- *     Called just after data structures for each port are
- *     initialized.
- *
- *     May be used as the port_start() entry in ata_port_operations.
- *
- *     LOCKING:
- *     Inherited from caller.
- */
-int ata_sas_port_start(struct ata_port *ap)
-{
-       /*
-        * the port is marked as frozen at allocation time, but if we don't
-        * have new eh, we won't thaw it
-        */
-       if (!ap->ops->error_handler)
-               ap->pflags &= ~ATA_PFLAG_FROZEN;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_start);
-
-/**
- *     ata_sas_port_stop - Undo ata_sas_port_start()
- *     @ap: Port to shut down
- *
- *     May be used as the port_stop() entry in ata_port_operations.
- *
- *     LOCKING:
- *     Inherited from caller.
- */
-
-void ata_sas_port_stop(struct ata_port *ap)
-{
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_stop);
-
-/**
- * ata_sas_async_probe - simply schedule probing and return
- * @ap: Port to probe
- *
- * For batch scheduling of probe for sas attached ata devices, assumes
- * the port has already been through ata_sas_port_init()
- */
-void ata_sas_async_probe(struct ata_port *ap)
-{
-       __ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_async_probe);
-
-int ata_sas_sync_probe(struct ata_port *ap)
-{
-       return ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
-
-
-/**
- *     ata_sas_port_init - Initialize a SATA device
- *     @ap: SATA port to initialize
- *
- *     LOCKING:
- *     PCI/etc. bus probe sem.
- *
- *     RETURNS:
- *     Zero on success, non-zero on error.
- */
-
-int ata_sas_port_init(struct ata_port *ap)
-{
-       int rc = ap->ops->port_start(ap);
-
-       if (rc)
-               return rc;
-       ap->print_id = atomic_inc_return(&ata_print_id);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_init);
-
 int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
 {
        return ata_tport_add(parent, ap);
@@ -1238,20 +1158,6 @@ void ata_sas_tport_delete(struct ata_port *ap)
 EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
 
 /**
- *     ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
- *     @ap: SATA port to destroy
- *
- */
-
-void ata_sas_port_destroy(struct ata_port *ap)
-{
-       if (ap->ops->port_stop)
-               ap->ops->port_stop(ap);
-       kfree(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
-
-/**
  *     ata_sas_slave_configure - Default slave_config routine for libata devices
  *     @sdev: SCSI device to configure
  *     @ap: ATA port to which SCSI device is attached
index c6ece32..e4e4175 100644 (file)
@@ -135,11 +135,11 @@ static ssize_t ata_scsi_park_store(struct device *device,
        struct scsi_device *sdev = to_scsi_device(device);
        struct ata_port *ap;
        struct ata_device *dev;
-       long int input;
+       int input;
        unsigned long flags;
        int rc;
 
-       rc = kstrtol(buf, 10, &input);
+       rc = kstrtoint(buf, 10, &input);
        if (rc)
                return rc;
        if (input < -2)
@@ -710,47 +710,6 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
 }
 
 /**
- *     ata_dump_status - user friendly display of error info
- *     @ap: the port in question
- *     @tf: ptr to filled out taskfile
- *
- *     Decode and dump the ATA error/status registers for the user so
- *     that they have some idea what really happened at the non
- *     make-believe layer.
- *
- *     LOCKING:
- *     inherited from caller
- */
-static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
-{
-       u8 stat = tf->status, err = tf->error;
-
-       if (stat & ATA_BUSY) {
-               ata_port_warn(ap, "status=0x%02x {Busy} ", stat);
-       } else {
-               ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat,
-                             stat & ATA_DRDY ? "DriveReady " : "",
-                             stat & ATA_DF ? "DeviceFault " : "",
-                             stat & ATA_DSC ? "SeekComplete " : "",
-                             stat & ATA_DRQ ? "DataRequest " : "",
-                             stat & ATA_CORR ? "CorrectedError " : "",
-                             stat & ATA_SENSE ? "Sense " : "",
-                             stat & ATA_ERR ? "Error " : "");
-               if (err)
-                       ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err,
-                                     err & ATA_ABORTED ?
-                                     "DriveStatusError " : "",
-                                     err & ATA_ICRC ?
-                                     (err & ATA_ABORTED ?
-                                      "BadCRC " : "Sector ") : "",
-                                     err & ATA_UNC ? "UncorrectableError " : "",
-                                     err & ATA_IDNF ? "SectorIdNotFound " : "",
-                                     err & ATA_TRK0NF ? "TrackZeroNotFound " : "",
-                                     err & ATA_AMNF ? "AddrMarkNotFound " : "");
-       }
-}
-
-/**
  *     ata_to_sense_error - convert ATA error to SCSI error
  *     @id: ATA device number
  *     @drv_stat: value contained in ATA status register
@@ -758,7 +717,6 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
  *     @sk: the sense key we'll fill out
  *     @asc: the additional sense code we'll fill out
  *     @ascq: the additional sense code qualifier we'll fill out
- *     @verbose: be verbose
  *
  *     Converts an ATA error into a SCSI error.  Fill out pointers to
  *     SK, ASC, and ASCQ bytes for later use in fixed or descriptor
@@ -768,7 +726,7 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
  *     spin_lock_irqsave(host lock)
  */
 static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
-                              u8 *asc, u8 *ascq, int verbose)
+                              u8 *asc, u8 *ascq)
 {
        int i;
 
@@ -847,7 +805,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
                                *sk = sense_table[i][1];
                                *asc = sense_table[i][2];
                                *ascq = sense_table[i][3];
-                               goto translate_done;
+                               return;
                        }
                }
        }
@@ -862,7 +820,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
                        *sk = stat_table[i][1];
                        *asc = stat_table[i][2];
                        *ascq = stat_table[i][3];
-                       goto translate_done;
+                       return;
                }
        }
 
@@ -873,12 +831,6 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
        *sk = ABORTED_COMMAND;
        *asc = 0x00;
        *ascq = 0x00;
-
- translate_done:
-       if (verbose)
-               pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n",
-                      id, drv_stat, drv_err, *sk, *asc, *ascq);
-       return;
 }
 
 /*
@@ -904,7 +856,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
        struct ata_taskfile *tf = &qc->result_tf;
        unsigned char *sb = cmd->sense_buffer;
        unsigned char *desc = sb + 8;
-       int verbose = qc->ap->ops->error_handler == NULL;
        u8 sense_key, asc, ascq;
 
        memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
@@ -916,7 +867,7 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
        if (qc->err_mask ||
            tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
                ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
-                                  &sense_key, &asc, &ascq, verbose);
+                                  &sense_key, &asc, &ascq);
                ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
        } else {
                /*
@@ -999,7 +950,6 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
        struct scsi_cmnd *cmd = qc->scsicmd;
        struct ata_taskfile *tf = &qc->result_tf;
        unsigned char *sb = cmd->sense_buffer;
-       int verbose = qc->ap->ops->error_handler == NULL;
        u64 block;
        u8 sense_key, asc, ascq;
 
@@ -1017,7 +967,7 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
        if (qc->err_mask ||
            tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
                ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
-                                  &sense_key, &asc, &ascq, verbose);
+                                  &sense_key, &asc, &ascq);
                ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq);
        } else {
                /* Could not decode error */
@@ -1186,9 +1136,6 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
        unsigned long flags;
        struct ata_device *dev;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
        dev = __ata_scsi_find_dev(ap, sdev);
        if (dev && dev->sdev) {
@@ -1675,7 +1622,6 @@ static void ata_qc_done(struct ata_queued_cmd *qc)
 
 static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 {
-       struct ata_port *ap = qc->ap;
        struct scsi_cmnd *cmd = qc->scsicmd;
        u8 *cdb = cmd->cmnd;
        int need_sense = (qc->err_mask != 0) &&
@@ -1699,9 +1645,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
                /* Keep the SCSI ML and status byte, clear host byte. */
                cmd->result &= 0x0000ffff;
 
-       if (need_sense && !ap->ops->error_handler)
-               ata_dump_status(ap, &qc->result_tf);
-
        ata_qc_done(qc);
 }
 
@@ -2608,71 +2551,6 @@ static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf)
        return 0;
 }
 
-static void atapi_sense_complete(struct ata_queued_cmd *qc)
-{
-       if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) {
-               /* FIXME: not quite right; we don't want the
-                * translation of taskfile registers into
-                * a sense descriptors, since that's only
-                * correct for ATA, not ATAPI
-                */
-               ata_gen_passthru_sense(qc);
-       }
-
-       ata_qc_done(qc);
-}
-
-/* is it pointless to prefer PIO for "safety reasons"? */
-static inline int ata_pio_use_silly(struct ata_port *ap)
-{
-       return (ap->flags & ATA_FLAG_PIO_DMA);
-}
-
-static void atapi_request_sense(struct ata_queued_cmd *qc)
-{
-       struct ata_port *ap = qc->ap;
-       struct scsi_cmnd *cmd = qc->scsicmd;
-
-       memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
-
-#ifdef CONFIG_ATA_SFF
-       if (ap->ops->sff_tf_read)
-               ap->ops->sff_tf_read(ap, &qc->tf);
-#endif
-
-       /* fill these in, for the case where they are -not- overwritten */
-       cmd->sense_buffer[0] = 0x70;
-       cmd->sense_buffer[2] = qc->tf.error >> 4;
-
-       ata_qc_reinit(qc);
-
-       /* setup sg table and init transfer direction */
-       sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
-       ata_sg_init(qc, &qc->sgent, 1);
-       qc->dma_dir = DMA_FROM_DEVICE;
-
-       memset(&qc->cdb, 0, qc->dev->cdb_len);
-       qc->cdb[0] = REQUEST_SENSE;
-       qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
-       qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
-       qc->tf.command = ATA_CMD_PACKET;
-
-       if (ata_pio_use_silly(ap)) {
-               qc->tf.protocol = ATAPI_PROT_DMA;
-               qc->tf.feature |= ATAPI_PKT_DMA;
-       } else {
-               qc->tf.protocol = ATAPI_PROT_PIO;
-               qc->tf.lbam = SCSI_SENSE_BUFFERSIZE;
-               qc->tf.lbah = 0;
-       }
-       qc->nbytes = SCSI_SENSE_BUFFERSIZE;
-
-       qc->complete_fn = atapi_sense_complete;
-
-       ata_qc_issue(qc);
-}
-
 /*
  * ATAPI devices typically report zero for their SCSI version, and sometimes
  * deviate from the spec WRT response data format.  If SCSI version is
@@ -2698,9 +2576,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
        struct scsi_cmnd *cmd = qc->scsicmd;
        unsigned int err_mask = qc->err_mask;
 
-       /* handle completion from new EH */
-       if (unlikely(qc->ap->ops->error_handler &&
-                    (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) {
+       /* handle completion from EH */
+       if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
 
                if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
                        /* FIXME: not quite right; we don't want the
@@ -2732,23 +2609,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
                return;
        }
 
-       /* successful completion or old EH failure path */
-       if (unlikely(err_mask & AC_ERR_DEV)) {
-               cmd->result = SAM_STAT_CHECK_CONDITION;
-               atapi_request_sense(qc);
-               return;
-       } else if (unlikely(err_mask)) {
-               /* FIXME: not quite right; we don't want the
-                * translation of taskfile registers into
-                * a sense descriptors, since that's only
-                * correct for ATA, not ATAPI
-                */
-               ata_gen_passthru_sense(qc);
-       } else {
-               if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
-                       atapi_fixup_inquiry(cmd);
-               cmd->result = SAM_STAT_GOOD;
-       }
+       /* successful completion path */
+       if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
+               atapi_fixup_inquiry(cmd);
+       cmd->result = SAM_STAT_GOOD;
 
        ata_qc_done(qc);
 }
@@ -4797,9 +4661,6 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
        unsigned long flags;
        int devno, rc = 0;
 
-       if (!ap->ops->error_handler)
-               return -EOPNOTSUPP;
-
        if (lun != SCAN_WILD_CARD && lun)
                return -EINVAL;
 
index 9d28bad..8fcc622 100644 (file)
@@ -883,31 +883,21 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 {
        struct ata_port *ap = qc->ap;
 
-       if (ap->ops->error_handler) {
-               if (in_wq) {
-                       /* EH might have kicked in while host lock is
-                        * released.
-                        */
-                       qc = ata_qc_from_tag(ap, qc->tag);
-                       if (qc) {
-                               if (likely(!(qc->err_mask & AC_ERR_HSM))) {
-                                       ata_sff_irq_on(ap);
-                                       ata_qc_complete(qc);
-                               } else
-                                       ata_port_freeze(ap);
-                       }
-               } else {
-                       if (likely(!(qc->err_mask & AC_ERR_HSM)))
+       if (in_wq) {
+               /* EH might have kicked in while host lock is released. */
+               qc = ata_qc_from_tag(ap, qc->tag);
+               if (qc) {
+                       if (likely(!(qc->err_mask & AC_ERR_HSM))) {
+                               ata_sff_irq_on(ap);
                                ata_qc_complete(qc);
-                       else
+                       else
                                ata_port_freeze(ap);
                }
        } else {
-               if (in_wq) {
-                       ata_sff_irq_on(ap);
-                       ata_qc_complete(qc);
-               } else
+               if (likely(!(qc->err_mask & AC_ERR_HSM)))
                        ata_qc_complete(qc);
+               else
+                       ata_port_freeze(ap);
        }
 }
 
@@ -1971,7 +1961,7 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
                       unsigned long deadline)
 {
        struct ata_eh_context *ehc = &link->eh_context;
-       const unsigned long *timing = sata_ehc_deb_timing(ehc);
+       const unsigned int *timing = sata_ehc_deb_timing(ehc);
        bool online;
        int rc;
 
index cf99388..6e7d352 100644 (file)
@@ -78,8 +78,6 @@ extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
 extern const char *sata_spd_string(unsigned int spd);
-extern int ata_port_probe(struct ata_port *ap);
-extern void __ata_port_probe(struct ata_port *ap);
 extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
                                      u8 page, void *buf, unsigned int sectors);
 
@@ -124,7 +122,6 @@ extern void ata_scsi_media_change_notify(struct ata_device *dev);
 extern void ata_scsi_hotplug(struct work_struct *work);
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
 extern void ata_scsi_dev_rescan(struct work_struct *work);
-extern int ata_bus_probe(struct ata_port *ap);
 extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
                              unsigned int id, u64 lun);
 void ata_scsi_sdev_config(struct scsi_device *sdev);
index 314eaa1..d0c6924 100644 (file)
@@ -917,15 +917,13 @@ static int arasan_cf_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int arasan_cf_remove(struct platform_device *pdev)
+static void arasan_cf_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct arasan_cf_dev *acdev = host->ports[0]->private_data;
 
        ata_host_detach(host);
        cf_exit(acdev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -966,7 +964,7 @@ MODULE_DEVICE_TABLE(of, arasan_cf_id_table);
 
 static struct platform_driver arasan_cf_driver = {
        .probe          = arasan_cf_probe,
-       .remove         = arasan_cf_remove,
+       .remove_new     = arasan_cf_remove,
        .driver         = {
                .name   = DRIVER_NAME,
                .pm     = &arasan_cf_pm_ops,
index 49bc619..c36ee99 100644 (file)
@@ -27,7 +27,6 @@
 
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
-#include <asm/ide.h>
 #include <asm/setup.h>
 
 #define DRV_NAME "pata_buddha"
index c6e043e..c84a208 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ata.h>
 #include <linux/libata.h>
 #include <linux/platform_device.h>
+#include <linux/sys_soc.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/ktime.h>
@@ -910,6 +911,12 @@ static struct ata_port_operations ep93xx_pata_port_ops = {
        .port_start             = ep93xx_pata_port_start,
 };
 
+static const struct soc_device_attribute ep93xx_soc_table[] = {
+       { .revision = "E1", .data = (void *)ATA_UDMA3 },
+       { .revision = "E2", .data = (void *)ATA_UDMA4 },
+       { /* sentinel */ }
+};
+
 static int ep93xx_pata_probe(struct platform_device *pdev)
 {
        struct ep93xx_pata_data *drv_data;
@@ -939,7 +946,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
 
        drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL);
        if (!drv_data) {
-               err = -ENXIO;
+               err = -ENOMEM;
                goto err_rel_gpio;
        }
 
@@ -952,7 +959,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
        /* allocate host */
        host = ata_host_alloc(&pdev->dev, 1);
        if (!host) {
-               err = -ENXIO;
+               err = -ENOMEM;
                goto err_rel_dma;
        }
 
@@ -976,12 +983,11 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
         * so this driver supports only UDMA modes.
         */
        if (drv_data->dma_rx_channel && drv_data->dma_tx_channel) {
-               int chip_rev = ep93xx_chip_revision();
+               const struct soc_device_attribute *match;
 
-               if (chip_rev == EP93XX_CHIP_REV_E1)
-                       ap->udma_mask = ATA_UDMA3;
-               else if (chip_rev == EP93XX_CHIP_REV_E2)
-                       ap->udma_mask = ATA_UDMA4;
+               match = soc_device_match(ep93xx_soc_table);
+               if (match)
+                       ap->udma_mask = (unsigned int) match->data;
                else
                        ap->udma_mask = ATA_UDMA2;
        }
@@ -1004,7 +1010,7 @@ err_rel_gpio:
        return err;
 }
 
-static int ep93xx_pata_remove(struct platform_device *pdev)
+static void ep93xx_pata_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct ep93xx_pata_data *drv_data = host->private_data;
@@ -1013,7 +1019,6 @@ static int ep93xx_pata_remove(struct platform_device *pdev)
        ep93xx_pata_release_dma(drv_data);
        ep93xx_pata_clear_regs(drv_data->ide_base);
        ep93xx_ide_release_gpio(pdev);
-       return 0;
 }
 
 static struct platform_driver ep93xx_pata_platform_driver = {
@@ -1021,7 +1026,7 @@ static struct platform_driver ep93xx_pata_platform_driver = {
                .name = DRV_NAME,
        },
        .probe = ep93xx_pata_probe,
-       .remove = ep93xx_pata_remove,
+       .remove_new = ep93xx_pata_remove,
 };
 
 module_platform_driver(ep93xx_pata_platform_driver);
index 996516e..0c2ae43 100644 (file)
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
 #include <asm/atari_stdma.h>
-#include <asm/ide.h>
 
 #define DRV_NAME "pata_falcon"
 #define DRV_VERSION "0.1.0"
 
+static int pata_falcon_swap_mask;
+
+module_param_named(data_swab, pata_falcon_swap_mask, int, 0444);
+MODULE_PARM_DESC(data_swab, "Data byte swap enable/disable bitmap (0x1==drive1, 0x2==drive2, 0x4==drive3, 0x8==drive4, default==0)");
+
 static const struct scsi_host_template pata_falcon_sht = {
        ATA_PIO_SHT(DRV_NAME),
 };
@@ -50,7 +54,7 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
 
        if (dev->class == ATA_DEV_ATA && cmd &&
            !blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)))
-               swap = 0;
+               swap = (uintptr_t)ap->private_data & BIT(dev->devno);
 
        /* Transfer multiple of 2 bytes */
        if (rw == READ) {
@@ -123,8 +127,9 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
        struct resource *base_res, *ctl_res, *irq_res;
        struct ata_host *host;
        struct ata_port *ap;
-       void __iomem *base;
-       int irq = 0;
+       void __iomem *base, *ctl_base;
+       int mask_shift = 0; /* Q40 & Falcon default */
+       int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
 
        dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
 
@@ -165,26 +170,38 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
        ap->pio_mask = ATA_PIO4;
        ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
 
-       base = (void __iomem *)base_mem_res->start;
        /* N.B. this assumes data_addr will be used for word-sized I/O only */
-       ap->ioaddr.data_addr            = base + 0 + 0 * 4;
-       ap->ioaddr.error_addr           = base + 1 + 1 * 4;
-       ap->ioaddr.feature_addr         = base + 1 + 1 * 4;
-       ap->ioaddr.nsect_addr           = base + 1 + 2 * 4;
-       ap->ioaddr.lbal_addr            = base + 1 + 3 * 4;
-       ap->ioaddr.lbam_addr            = base + 1 + 4 * 4;
-       ap->ioaddr.lbah_addr            = base + 1 + 5 * 4;
-       ap->ioaddr.device_addr          = base + 1 + 6 * 4;
-       ap->ioaddr.status_addr          = base + 1 + 7 * 4;
-       ap->ioaddr.command_addr         = base + 1 + 7 * 4;
-
-       base = (void __iomem *)ctl_mem_res->start;
-       ap->ioaddr.altstatus_addr       = base + 1;
-       ap->ioaddr.ctl_addr             = base + 1;
-
-       ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
-                     (unsigned long)base_mem_res->start,
-                     (unsigned long)ctl_mem_res->start);
+       ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
+
+       if (base_res) {         /* only Q40 has IO resources */
+               io_offset = 0x10000;
+               reg_shift = 0;
+               base = (void __iomem *)base_res->start;
+               ctl_base = (void __iomem *)ctl_res->start;
+       } else {
+               base = (void __iomem *)base_mem_res->start;
+               ctl_base = (void __iomem *)ctl_mem_res->start;
+       }
+
+       ap->ioaddr.error_addr   = base + io_offset + (1 << reg_shift);
+       ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
+       ap->ioaddr.nsect_addr   = base + io_offset + (2 << reg_shift);
+       ap->ioaddr.lbal_addr    = base + io_offset + (3 << reg_shift);
+       ap->ioaddr.lbam_addr    = base + io_offset + (4 << reg_shift);
+       ap->ioaddr.lbah_addr    = base + io_offset + (5 << reg_shift);
+       ap->ioaddr.device_addr  = base + io_offset + (6 << reg_shift);
+       ap->ioaddr.status_addr  = base + io_offset + (7 << reg_shift);
+       ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
+
+       ap->ioaddr.altstatus_addr       = ctl_base + io_offset;
+       ap->ioaddr.ctl_addr             = ctl_base + io_offset;
+
+       ata_port_desc(ap, "cmd %px ctl %px data %px",
+                     base, ctl_base, ap->ioaddr.data_addr);
+
+       if (pdev->id > 0)
+               mask_shift = 2;
+       ap->private_data = (void *)(uintptr_t)(pata_falcon_swap_mask >> mask_shift);
 
        irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
        if (irq_res && irq_res->start > 0) {
index 6f6734c..4d6ef90 100644 (file)
@@ -14,8 +14,7 @@
 #include <linux/module.h>
 #include <linux/libata.h>
 #include <linux/bitops.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/clk.h>
 #include "sata_gemini.h"
 
@@ -470,11 +469,7 @@ static int pata_ftide010_probe(struct platform_device *pdev)
        if (irq < 0)
                return irq;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       ftide->base = devm_ioremap_resource(dev, res);
+       ftide->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(ftide->base))
                return PTR_ERR(ftide->base);
 
@@ -541,15 +536,13 @@ err_dis_clk:
        return ret;
 }
 
-static int pata_ftide010_remove(struct platform_device *pdev)
+static void pata_ftide010_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct ftide010 *ftide = host->private_data;
 
        ata_host_detach(ftide->host);
        clk_disable_unprepare(ftide->pclk);
-
-       return 0;
 }
 
 static const struct of_device_id pata_ftide010_of_match[] = {
@@ -563,10 +556,11 @@ static struct platform_driver pata_ftide010_driver = {
                .of_match_table = pata_ftide010_of_match,
        },
        .probe = pata_ftide010_probe,
-       .remove = pata_ftide010_remove,
+       .remove_new = pata_ftide010_remove,
 };
 module_platform_driver(pata_ftide010_driver);
 
+MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRV_NAME);
index e5aa07f..3bdbe2b 100644 (file)
@@ -27,7 +27,6 @@
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
 #include <asm/amigayle.h>
-#include <asm/ide.h>
 #include <asm/setup.h>
 
 #define DRV_NAME "pata_gayle"
index 4013f28..d0aa8fc 100644 (file)
@@ -141,21 +141,15 @@ static int pata_imx_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       priv->clk = devm_clk_get(&pdev->dev, NULL);
+       priv->clk = devm_clk_get_enabled(&pdev->dev, NULL);
        if (IS_ERR(priv->clk)) {
-               dev_err(&pdev->dev, "Failed to get clock\n");
+               dev_err(&pdev->dev, "Failed to get and enable clock\n");
                return PTR_ERR(priv->clk);
        }
 
-       ret = clk_prepare_enable(priv->clk);
-       if (ret)
-               return ret;
-
        host = ata_host_alloc(&pdev->dev, 1);
-       if (!host) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!host)
+               return -ENOMEM;
 
        host->private_data = priv;
        ap = host->ports[0];
@@ -164,12 +158,9 @@ static int pata_imx_probe(struct platform_device *pdev)
        ap->pio_mask = ATA_PIO4;
        ap->flags |= ATA_FLAG_SLAVE_POSS;
 
-       io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->host_regs = devm_ioremap_resource(&pdev->dev, io_res);
-       if (IS_ERR(priv->host_regs)) {
-               ret = PTR_ERR(priv->host_regs);
-               goto err;
-       }
+       priv->host_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &io_res);
+       if (IS_ERR(priv->host_regs))
+               return PTR_ERR(priv->host_regs);
 
        ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA;
        ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL;
@@ -195,16 +186,12 @@ static int pata_imx_probe(struct platform_device *pdev)
                                &pata_imx_sht);
 
        if (ret)
-               goto err;
+               return ret;
 
        return 0;
-err:
-       clk_disable_unprepare(priv->clk);
-
-       return ret;
 }
 
-static int pata_imx_remove(struct platform_device *pdev)
+static void pata_imx_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct pata_imx_priv *priv = host->private_data;
@@ -212,10 +199,6 @@ static int pata_imx_remove(struct platform_device *pdev)
        ata_host_detach(host);
 
        __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
-
-       clk_disable_unprepare(priv->clk);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -266,7 +249,7 @@ MODULE_DEVICE_TABLE(of, imx_pata_dt_ids);
 
 static struct platform_driver pata_imx_driver = {
        .probe          = pata_imx_probe,
-       .remove         = pata_imx_remove,
+       .remove_new     = pata_imx_remove,
        .driver = {
                .name           = DRV_NAME,
                .of_match_table = imx_pata_dt_ids,
index b1daa4d..246bb4f 100644 (file)
@@ -242,12 +242,6 @@ static int ixp4xx_pata_probe(struct platform_device *pdev)
        int ret;
        int irq;
 
-       cmd = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
-       if (!cmd || !ctl)
-               return -EINVAL;
-
        ixpp = devm_kzalloc(dev, sizeof(*ixpp), GFP_KERNEL);
        if (!ixpp)
                return -ENOMEM;
@@ -271,18 +265,18 @@ static int ixp4xx_pata_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       ixpp->cmd = devm_ioremap_resource(dev, cmd);
-       ixpp->ctl = devm_ioremap_resource(dev, ctl);
-       if (IS_ERR(ixpp->cmd) || IS_ERR(ixpp->ctl))
-               return -ENOMEM;
+       ixpp->cmd = devm_platform_get_and_ioremap_resource(pdev, 0, &cmd);
+       if (IS_ERR(ixpp->cmd))
+               return PTR_ERR(ixpp->cmd);
+
+       ixpp->ctl = devm_platform_get_and_ioremap_resource(pdev, 1, &ctl);
+       if (IS_ERR(ixpp->ctl))
+               return PTR_ERR(ixpp->ctl);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq > 0)
-               irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
-       else if (irq < 0)
+       if (irq < 0)
                return irq;
-       else
-               return -EINVAL;
+       irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
 
        /* Just one port to set up */
        ixp4xx_setup_port(ixpp->host->ports[0], ixpp, cmd->start, ctl->start);
index 66c9dea..6c317a4 100644 (file)
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/libata.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #include <asm/cacheflush.h>
@@ -800,8 +801,7 @@ static int mpc52xx_ata_probe(struct platform_device *op)
        return rv;
 }
 
-static int
-mpc52xx_ata_remove(struct platform_device *op)
+static void mpc52xx_ata_remove(struct platform_device *op)
 {
        struct ata_host *host = platform_get_drvdata(op);
        struct mpc52xx_ata_priv *priv = host->private_data;
@@ -815,8 +815,6 @@ mpc52xx_ata_remove(struct platform_device *op)
        irq_dispose_mapping(task_irq);
        bcom_ata_release(priv->dmatsk);
        irq_dispose_mapping(priv->ata_irq);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -857,7 +855,7 @@ static const struct of_device_id mpc52xx_ata_of_match[] = {
 
 static struct platform_driver mpc52xx_ata_of_platform_driver = {
        .probe          = mpc52xx_ata_probe,
-       .remove         = mpc52xx_ata_remove,
+       .remove_new     = mpc52xx_ata_remove,
 #ifdef CONFIG_PM_SLEEP
        .suspend        = mpc52xx_ata_suspend,
        .resume         = mpc52xx_ata_resume,
index ea402e0..5275c64 100644 (file)
@@ -295,7 +295,7 @@ static int pxa_ata_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int pxa_ata_remove(struct platform_device *pdev)
+static void pxa_ata_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct pata_pxa_data *data = host->ports[0]->private_data;
@@ -303,13 +303,11 @@ static int pxa_ata_remove(struct platform_device *pdev)
        dma_release_channel(data->dma_chan);
 
        ata_host_detach(host);
-
-       return 0;
 }
 
 static struct platform_driver pxa_ata_driver = {
        .probe          = pxa_ata_probe,
-       .remove         = pxa_ata_remove,
+       .remove_new     = pxa_ata_remove,
        .driver         = {
                .name           = DRV_NAME,
        },
index 3974d29..0fa253a 100644 (file)
@@ -155,18 +155,16 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int rb532_pata_driver_remove(struct platform_device *pdev)
+static void rb532_pata_driver_remove(struct platform_device *pdev)
 {
        struct ata_host *ah = platform_get_drvdata(pdev);
 
        ata_host_detach(ah);
-
-       return 0;
 }
 
 static struct platform_driver rb532_pata_platform_driver = {
        .probe          = rb532_pata_driver_probe,
-       .remove         = rb532_pata_driver_remove,
+       .remove_new     = rb532_pata_driver_remove,
        .driver  = {
                .name   = DRV_NAME,
        },
index 3b62ea4..93882e9 100644 (file)
@@ -180,8 +180,7 @@ static void sl82c105_bmdma_start(struct ata_queued_cmd *qc)
  *     document.
  *
  *     This function is also called to turn off DMA when a timeout occurs
- *     during DMA operation. In both cases we need to reset the engine,
- *     so no actual eng_timeout handler is required.
+ *     during DMA operation. In both cases we need to reset the engine.
  *
  *     We assume bmdma_stop is always called if bmdma_start as called. If
  *     not then we may need to wrap qc_issue.
index fabdd1e..52f5168 100644 (file)
@@ -18,9 +18,8 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/dmaengine.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/libata.h>
@@ -1211,7 +1210,7 @@ error_out:
        return err;
 }
 
-static int sata_dwc_remove(struct platform_device *ofdev)
+static void sata_dwc_remove(struct platform_device *ofdev)
 {
        struct device *dev = &ofdev->dev;
        struct ata_host *host = dev_get_drvdata(dev);
@@ -1227,7 +1226,6 @@ static int sata_dwc_remove(struct platform_device *ofdev)
 #endif
 
        dev_dbg(dev, "done\n");
-       return 0;
 }
 
 static const struct of_device_id sata_dwc_match[] = {
@@ -1242,7 +1240,7 @@ static struct platform_driver sata_dwc_driver = {
                .of_match_table = sata_dwc_match,
        },
        .probe = sata_dwc_probe,
-       .remove = sata_dwc_remove,
+       .remove_new = sata_dwc_remove,
 };
 
 module_platform_driver(sata_dwc_driver);
index ccd99b9..01aa05f 100644 (file)
@@ -12,6 +12,9 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -19,9 +22,6 @@
 #include <scsi/scsi_cmnd.h>
 #include <linux/libata.h>
 #include <asm/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
 
 static unsigned int intr_coalescing_count;
 module_param(intr_coalescing_count, int, S_IRUGO);
@@ -1526,7 +1526,7 @@ error_exit_with_cleanup:
        return retval;
 }
 
-static int sata_fsl_remove(struct platform_device *ofdev)
+static void sata_fsl_remove(struct platform_device *ofdev)
 {
        struct ata_host *host = platform_get_drvdata(ofdev);
        struct sata_fsl_host_priv *host_priv = host->private_data;
@@ -1535,8 +1535,6 @@ static int sata_fsl_remove(struct platform_device *ofdev)
        device_remove_file(&ofdev->dev, &host_priv->rx_watermark);
 
        ata_host_detach(host);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1591,7 +1589,7 @@ static struct platform_driver fsl_sata_driver = {
                .of_match_table = fsl_sata_match,
        },
        .probe          = sata_fsl_probe,
-       .remove         = sata_fsl_remove,
+       .remove_new     = sata_fsl_remove,
 #ifdef CONFIG_PM_SLEEP
        .suspend        = sata_fsl_suspend,
        .resume         = sata_fsl_resume,
index c42cc9b..400b22e 100644 (file)
@@ -12,8 +12,7 @@
 #include <linux/regmap.h>
 #include <linux/delay.h>
 #include <linux/reset.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/pinctrl/consumer.h>
@@ -400,7 +399,7 @@ out_unprep_clk:
        return ret;
 }
 
-static int gemini_sata_remove(struct platform_device *pdev)
+static void gemini_sata_remove(struct platform_device *pdev)
 {
        struct sata_gemini *sg = platform_get_drvdata(pdev);
 
@@ -409,8 +408,6 @@ static int gemini_sata_remove(struct platform_device *pdev)
                clk_unprepare(sg->sata0_pclk);
        }
        sg_singleton = NULL;
-
-       return 0;
 }
 
 static const struct of_device_id gemini_sata_of_match[] = {
@@ -424,10 +421,11 @@ static struct platform_driver gemini_sata_driver = {
                .of_match_table = gemini_sata_of_match,
        },
        .probe = gemini_sata_probe,
-       .remove = gemini_sata_remove,
+       .remove_new = gemini_sata_remove,
 };
 module_platform_driver(gemini_sata_driver);
 
+MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRV_NAME);
index d6b324d..63ef7bb 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
@@ -385,7 +385,7 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr)
 static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
                                unsigned long deadline)
 {
-       static const unsigned long timing[] = { 5, 100, 500};
+       static const unsigned int timing[] = { 5, 100, 500};
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index 2c8c78e..db9c255 100644 (file)
@@ -619,7 +619,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
        struct ata_port *ap = link->ap;
        void __iomem *port_base = inic_port_base(ap);
        void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        int rc;
 
        /* hammer it into sane state */
index d404e63..d105db5 100644 (file)
@@ -3633,7 +3633,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
 
        /* Workaround for errata FEr SATA#10 (part 2) */
        do {
-               const unsigned long *timing =
+               const unsigned int *timing =
                                sata_ehc_deb_timing(&link->eh_context);
 
                rc = sata_link_hardreset(link, timing, deadline + extra,
@@ -4210,7 +4210,7 @@ err:
  *      A platform bus SATA device has been unplugged. Perform the needed
  *      cleanup. Also called on module unload for any active devices.
  */
-static int mv_platform_remove(struct platform_device *pdev)
+static void mv_platform_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct mv_host_priv *hpriv = host->private_data;
@@ -4228,7 +4228,6 @@ static int mv_platform_remove(struct platform_device *pdev)
                }
                phy_power_off(hpriv->port_phys[port]);
        }
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -4284,7 +4283,7 @@ MODULE_DEVICE_TABLE(of, mv_sata_dt_ids);
 
 static struct platform_driver mv_platform_driver = {
        .probe          = mv_platform_probe,
-       .remove         = mv_platform_remove,
+       .remove_new     = mv_platform_remove,
        .suspend        = mv_platform_suspend,
        .resume         = mv_platform_resume,
        .driver         = {
index abf5651..0a0cee7 100644 (file)
@@ -1529,7 +1529,7 @@ static int nv_hardreset(struct ata_link *link, unsigned int *class,
                sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
                                    NULL, NULL);
        else {
-               const unsigned long *timing = sata_ehc_deb_timing(ehc);
+               const unsigned int *timing = sata_ehc_deb_timing(ehc);
                int rc;
 
                if (!(ehc->i.flags & ATA_EHI_QUIET))
index 34790f1..c1469d0 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/ata.h>
 #include <linux/libata.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/err.h>
@@ -861,15 +861,11 @@ static int sata_rcar_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct ata_host *host;
        struct sata_rcar_priv *priv;
-       struct resource *mem;
-       int irq;
-       int ret = 0;
+       int irq, ret;
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
-       if (!irq)
-               return -EINVAL;
 
        priv = devm_kzalloc(dev, sizeof(struct sata_rcar_priv), GFP_KERNEL);
        if (!priv)
@@ -890,8 +886,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
 
        host->private_data = priv;
 
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->base = devm_ioremap_resource(dev, mem);
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base)) {
                ret = PTR_ERR(priv->base);
                goto err_pm_put;
@@ -914,7 +909,7 @@ err_pm_put:
        return ret;
 }
 
-static int sata_rcar_remove(struct platform_device *pdev)
+static void sata_rcar_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct sata_rcar_priv *priv = host->private_data;
@@ -930,8 +925,6 @@ static int sata_rcar_remove(struct platform_device *pdev)
 
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1016,7 +1009,7 @@ static const struct dev_pm_ops sata_rcar_pm_ops = {
 
 static struct platform_driver sata_rcar_driver = {
        .probe          = sata_rcar_probe,
-       .remove         = sata_rcar_remove,
+       .remove_new     = sata_rcar_remove,
        .driver = {
                .name           = DRV_NAME,
                .of_match_table = sata_rcar_match,
index e72a025..142e70b 100644 (file)
@@ -597,7 +597,7 @@ static int sil24_init_port(struct ata_port *ap)
 static int sil24_exec_polled_cmd(struct ata_port *ap, int pmp,
                                 const struct ata_taskfile *tf,
                                 int is_cmd, u32 ctrl,
-                                unsigned long timeout_msec)
+                                unsigned int timeout_msec)
 {
        void __iomem *port = sil24_port_base(ap);
        struct sil24_port_priv *pp = ap->private_data;
@@ -651,7 +651,7 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class,
 {
        struct ata_port *ap = link->ap;
        int pmp = sata_srst_pmp(link);
-       unsigned long timeout_msec = 0;
+       unsigned int timeout_msec = 0;
        struct ata_taskfile tf;
        const char *reason;
        int rc;
index ccc0160..b51d7a9 100644 (file)
@@ -232,7 +232,6 @@ static const struct scsi_host_template pdc_sata_sht = {
        .dma_boundary           = ATA_DMA_BOUNDARY,
 };
 
-/* TODO: inherit from base port_ops after converting to new EH */
 static struct ata_port_operations pdc_20621_ops = {
        .inherits               = &ata_sff_port_ops,
 
index 2328cc0..3de11f0 100644 (file)
@@ -7199,7 +7199,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 static ssize_t do_rbd_remove(const char *buf, size_t count)
 {
        struct rbd_device *rbd_dev = NULL;
-       struct list_head *tmp;
        int dev_id;
        char opt_buf[6];
        bool force = false;
@@ -7226,8 +7225,7 @@ static ssize_t do_rbd_remove(const char *buf, size_t count)
 
        ret = -ENOENT;
        spin_lock(&rbd_dev_list_lock);
-       list_for_each(tmp, &rbd_dev_list) {
-               rbd_dev = list_entry(tmp, struct rbd_device, node);
+       list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
                if (rbd_dev->dev_id == dev_id) {
                        ret = 0;
                        break;
index 9eb1a18..ea085b1 100644 (file)
@@ -463,28 +463,6 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
        return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
 }
 
-static int crb_check_flags(struct tpm_chip *chip)
-{
-       u32 val;
-       int ret;
-
-       ret = crb_request_locality(chip, 0);
-       if (ret)
-               return ret;
-
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
-       if (ret)
-               goto release;
-
-       if (val == 0x414D4400U /* AMD */)
-               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
-
-release:
-       crb_relinquish_locality(chip, 0);
-
-       return ret;
-}
-
 static const struct tpm_class_ops tpm_crb = {
        .flags = TPM_OPS_AUTO_STARTUP,
        .status = crb_status,
@@ -797,12 +775,13 @@ static int crb_acpi_add(struct acpi_device *device)
                                FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
                                buf->header.length,
                                ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON);
-                       return -EINVAL;
+                       rc = -EINVAL;
+                       goto out;
                }
                crb_pluton = ACPI_ADD_PTR(struct tpm2_crb_pluton, buf, sizeof(*buf));
                rc = crb_map_pluton(dev, priv, buf, crb_pluton);
                if (rc)
-                       return rc;
+                       goto out;
        }
 
        priv->sm = sm;
@@ -826,9 +805,14 @@ static int crb_acpi_add(struct acpi_device *device)
        if (rc)
                goto out;
 
-       rc = crb_check_flags(chip);
-       if (rc)
-               goto out;
+#ifdef CONFIG_X86
+       /* A quirk for https://www.amd.com/en/support/kb/faq/pa-410 */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+           priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+               dev_info(dev, "Disabling hwrng\n");
+               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+       }
+#endif /* CONFIG_X86 */
 
        rc = tpm_chip_register(chip);
 
index a757f90..60ed890 100644 (file)
@@ -86,6 +86,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy);
 static int cpufreq_set_policy(struct cpufreq_policy *policy,
                              struct cpufreq_governor *new_gov,
                              unsigned int new_pol);
+static bool cpufreq_boost_supported(void);
 
 /*
  * Two notifier lists: the "policy" list is involved in the
@@ -455,8 +456,10 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
                            policy->cur,
                            policy->cpuinfo.max_freq);
 
+       spin_lock(&policy->transition_lock);
        policy->transition_ongoing = false;
        policy->transition_task = NULL;
+       spin_unlock(&policy->transition_lock);
 
        wake_up(&policy->transition_wait);
 }
@@ -621,6 +624,40 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr,
 }
 define_one_global_rw(boost);
 
+static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", policy->boost_enabled);
+}
+
+static ssize_t store_local_boost(struct cpufreq_policy *policy,
+                                const char *buf, size_t count)
+{
+       int ret, enable;
+
+       ret = kstrtoint(buf, 10, &enable);
+       if (ret || enable < 0 || enable > 1)
+               return -EINVAL;
+
+       if (!cpufreq_driver->boost_enabled)
+               return -EINVAL;
+
+       if (policy->boost_enabled == enable)
+               return count;
+
+       cpus_read_lock();
+       ret = cpufreq_driver->set_boost(policy, enable);
+       cpus_read_unlock();
+
+       if (ret)
+               return ret;
+
+       policy->boost_enabled = enable;
+
+       return count;
+}
+
+static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
+
 static struct cpufreq_governor *find_governor(const char *str_governor)
 {
        struct cpufreq_governor *t;
@@ -1055,6 +1092,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
                        return ret;
        }
 
+       if (cpufreq_boost_supported()) {
+               ret = sysfs_create_file(&policy->kobj, &local_boost.attr);
+               if (ret)
+                       return ret;
+       }
+
        return 0;
 }
 
@@ -1943,16 +1986,16 @@ void cpufreq_resume(void)
 
        for_each_active_policy(policy) {
                if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) {
-                       pr_err("%s: Failed to resume driver: %p\n", __func__,
-                               policy);
+                       pr_err("%s: Failed to resume driver: %s\n", __func__,
+                               cpufreq_driver->name);
                } else if (has_target()) {
                        down_write(&policy->rwsem);
                        ret = cpufreq_start_governor(policy);
                        up_write(&policy->rwsem);
 
                        if (ret)
-                               pr_err("%s: Failed to start governor for policy: %p\n",
-                                      __func__, policy);
+                               pr_err("%s: Failed to start governor for CPU%u's policy\n",
+                                      __func__, policy->cpu);
                }
        }
 }
@@ -2716,6 +2759,8 @@ int cpufreq_boost_trigger_state(int state)
                ret = cpufreq_driver->set_boost(policy, state);
                if (ret)
                        goto err_reset_state;
+
+               policy->boost_enabled = state;
        }
        cpus_read_unlock();
 
index 85da677..af44ee6 100644 (file)
@@ -439,7 +439,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 
        ret = gov->init(dbs_data);
        if (ret)
-               goto free_policy_dbs_info;
+               goto free_dbs_data;
 
        /*
         * The sampling interval should not be less than the transition latency
@@ -474,6 +474,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        if (!have_governor_per_policy())
                gov->gdbs_data = NULL;
        gov->exit(dbs_data);
+
+free_dbs_data:
        kfree(dbs_data);
 
 free_policy_dbs_info:
index 84fe37d..6f8b5ea 100644 (file)
@@ -232,8 +232,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
        status = ioread16(&pcch_hdr->status);
        iowrite16(0, &pcch_hdr->status);
 
-       cpufreq_freq_transition_end(policy, &freqs, status != CMD_COMPLETE);
        spin_unlock(&pcc_lock);
+       cpufreq_freq_transition_end(policy, &freqs, status != CMD_COMPLETE);
 
        if (status != CMD_COMPLETE) {
                pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
index 0a7264a..324e942 100644 (file)
@@ -575,6 +575,26 @@ static int zynq_gpio_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static int zynq_gpio_irq_reqres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+       int ret;
+
+       ret = pm_runtime_resume_and_get(chip->parent);
+       if (ret < 0)
+               return ret;
+
+       return gpiochip_reqres_irq(chip, d->hwirq);
+}
+
+static void zynq_gpio_irq_relres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       gpiochip_relres_irq(chip, d->hwirq);
+       pm_runtime_put(chip->parent);
+}
+
 /* irq chip descriptor */
 static const struct irq_chip zynq_gpio_level_irqchip = {
        .name           = DRIVER_NAME,
@@ -584,9 +604,10 @@ static const struct irq_chip zynq_gpio_level_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
                          IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static const struct irq_chip zynq_gpio_edge_irqchip = {
@@ -597,8 +618,9 @@ static const struct irq_chip zynq_gpio_edge_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
index df633e9..cdf6087 100644 (file)
@@ -442,9 +442,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
                        mem_info->local_mem_size_public,
                        mem_info->local_mem_size_private);
 
-       if (amdgpu_sriov_vf(adev))
-               mem_info->mem_clk_max = adev->clock.default_mclk / 100;
-       else if (adev->pm.dpm_enabled) {
+       if (adev->pm.dpm_enabled) {
                if (amdgpu_emu_mode == 1)
                        mem_info->mem_clk_max = 0;
                else
@@ -463,9 +461,7 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
 {
        /* the sclk is in quantas of 10kHz */
-       if (amdgpu_sriov_vf(adev))
-               return adev->clock.default_sclk / 100;
-       else if (adev->pm.dpm_enabled)
+       if (adev->pm.dpm_enabled)
                return amdgpu_dpm_get_sclk(adev, false) / 100;
        else
                return 100;
index 835980e..fb2681d 100644 (file)
@@ -217,6 +217,7 @@ union umc_info {
        struct atom_umc_info_v3_1 v31;
        struct atom_umc_info_v3_2 v32;
        struct atom_umc_info_v3_3 v33;
+       struct atom_umc_info_v4_0 v40;
 };
 
 union vram_info {
@@ -508,9 +509,8 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
 
        if (amdgpu_atom_parse_data_header(mode_info->atom_context,
                                index, &size, &frev, &crev, &data_offset)) {
+               umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
                if (frev == 3) {
-                       umc_info = (union umc_info *)
-                               (mode_info->atom_context->bios + data_offset);
                        switch (crev) {
                        case 1:
                                umc_config = le32_to_cpu(umc_info->v31.umc_config);
@@ -533,6 +533,20 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
                                /* unsupported crev */
                                return false;
                        }
+               } else if (frev == 4) {
+                       switch (crev) {
+                       case 0:
+                               umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+                               ecc_default_enabled =
+                                       (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+                               break;
+                       default:
+                               /* unsupported crev */
+                               return false;
+                       }
+               } else {
+                       /* unsupported frev */
+                       return false;
                }
        }
 
index 49dd9aa..efdb1c4 100644 (file)
@@ -127,7 +127,6 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
 {
        struct drm_gem_object *gobj;
        unsigned long size;
-       int r;
 
        gobj = drm_gem_object_lookup(p->filp, data->handle);
        if (gobj == NULL)
@@ -137,23 +136,14 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
        drm_gem_object_put(gobj);
 
        size = amdgpu_bo_size(p->uf_bo);
-       if (size != PAGE_SIZE || (data->offset + 8) > size) {
-               r = -EINVAL;
-               goto error_unref;
-       }
+       if (size != PAGE_SIZE || data->offset > (size - 8))
+               return -EINVAL;
 
-       if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) {
-               r = -EINVAL;
-               goto error_unref;
-       }
+       if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+               return -EINVAL;
 
        *offset = data->offset;
-
        return 0;
-
-error_unref:
-       amdgpu_bo_unref(&p->uf_bo);
-       return r;
 }
 
 static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
index e77f048..3f001a5 100644 (file)
@@ -885,13 +885,20 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
  */
 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 {
+       int ret;
+
        amdgpu_asic_pre_asic_init(adev);
 
        if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
-           adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
-               return amdgpu_atomfirmware_asic_init(adev, true);
-       else
+           adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+               amdgpu_psp_wait_for_bootloader(adev);
+               ret = amdgpu_atomfirmware_asic_init(adev, true);
+               return ret;
+       } else {
                return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+       }
+
+       return 0;
 }
 
 /**
@@ -4694,9 +4701,12 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
        }
 
        if (ret)
-               dev_err(adev->dev, "GPU mode1 reset failed\n");
+               goto mode1_reset_failed;
 
        amdgpu_device_load_pci_state(adev->pdev);
+       ret = amdgpu_psp_wait_for_bootloader(adev);
+       if (ret)
+               goto mode1_reset_failed;
 
        /* wait for asic to come out of reset */
        for (i = 0; i < adev->usec_timeout; i++) {
@@ -4707,7 +4717,17 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
                udelay(1);
        }
 
+       if (i >= adev->usec_timeout) {
+               ret = -ETIMEDOUT;
+               goto mode1_reset_failed;
+       }
+
        amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+       return 0;
+
+mode1_reset_failed:
+       dev_err(adev->dev, "GPU mode1 reset failed\n");
        return ret;
 }
 
@@ -4849,7 +4869,7 @@ static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
        struct drm_device *dev = adev_to_drm(adev);
 
        ktime_get_ts64(&adev->reset_time);
-       dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
+       dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
                      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
 }
 #endif
index 74ffe65..7d5e7ad 100644 (file)
@@ -1390,6 +1390,7 @@ union gc_info {
        struct gc_info_v1_1 v1_1;
        struct gc_info_v1_2 v1_2;
        struct gc_info_v2_0 v2;
+       struct gc_info_v2_1 v2_1;
 };
 
 static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
@@ -1465,6 +1466,15 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
                adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
                        le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
                adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+               if (gc_info->v2.header.version_minor == 1) {
+                       adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+                       adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+                       adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+                       adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+                       adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+                       adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+                       adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+               }
                break;
        default:
                dev_err(adev->dev,
@@ -1478,6 +1488,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
 
 union mall_info {
        struct mall_info_v1_0 v1;
+       struct mall_info_v2_0 v2;
 };
 
 static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
@@ -1518,6 +1529,10 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
                adev->gmc.mall_size = mall_size;
                adev->gmc.m_half_use = half_use;
                break;
+       case 2:
+               mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+               adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+               break;
        default:
                dev_err(adev->dev,
                        "Unhandled MALL info table %d.%d\n",
index d20dd3f..363e6a2 100644 (file)
@@ -38,6 +38,8 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -532,11 +534,29 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
        return true;
 }
 
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+                         unsigned int flags, unsigned int color,
+                         struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+       if (file)
+               return -ENOSYS;
+
+       return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+                                        num_clips);
+}
+
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
        .destroy = drm_gem_fb_destroy,
        .create_handle = drm_gem_fb_create_handle,
 };
 
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+       .destroy = drm_gem_fb_destroy,
+       .create_handle = drm_gem_fb_create_handle,
+       .dirty = amdgpu_dirtyfb
+};
+
 uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
                                          uint64_t bo_flags)
 {
@@ -1139,7 +1159,11 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
        if (ret)
                goto err;
 
-       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+       if (drm_drv_uses_atomic_modeset(dev))
+               ret = drm_framebuffer_init(dev, &rfb->base,
+                                          &amdgpu_fb_funcs_atomic);
+       else
+               ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 
        if (ret)
                goto err;
index a4ff515..395c176 100644 (file)
@@ -241,6 +241,9 @@ struct amdgpu_gfx_config {
        uint32_t gc_gl1c_per_sa;
        uint32_t gc_gl1c_size_per_instance;
        uint32_t gc_gl2c_per_gpu;
+       uint32_t gc_tcp_size_per_cu;
+       uint32_t gc_num_cu_per_sqc;
+       uint32_t gc_tcc_size;
 };
 
 struct amdgpu_cu_info {
index 8fdca54..429ef21 100644 (file)
@@ -2078,6 +2078,17 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 }
 /* SECUREDISPLAY end */
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+       struct psp_context *psp = &adev->psp;
+       int ret = 0;
+
+       if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+               ret = psp->funcs->wait_for_bootloader(psp);
+
+       return ret;
+}
+
 static int psp_hw_start(struct psp_context *psp)
 {
        struct amdgpu_device *adev = psp->adev;
index 3384eb9..3e67ed6 100644 (file)
@@ -109,6 +109,7 @@ enum psp_reg_prog_id {
 
 struct psp_funcs {
        int (*init_microcode)(struct psp_context *psp);
+       int (*wait_for_bootloader)(struct psp_context *psp);
        int (*bootloader_load_kdb)(struct psp_context *psp);
        int (*bootloader_load_spl)(struct psp_context *psp);
        int (*bootloader_load_sysdrv)(struct psp_context *psp);
@@ -533,4 +534,6 @@ int psp_spatial_partition(struct psp_context *psp, int mode);
 
 int is_psp_fw_valid(struct psp_bin_desc bin);
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+
 #endif
index 7689395..3c4600e 100644 (file)
@@ -764,7 +764,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        union ta_ras_cmd_input *info;
-       int ret = 0;
+       int ret;
 
        if (!con)
                return -EINVAL;
@@ -773,7 +773,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
        if (enable &&
            head->block != AMDGPU_RAS_BLOCK__GFX &&
            !amdgpu_ras_is_feature_allowed(adev, head))
-               goto out;
+               return 0;
 
        /* Only enable gfx ras feature from host side */
        if (head->block == AMDGPU_RAS_BLOCK__GFX &&
@@ -801,16 +801,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
                                enable ? "enable":"disable",
                                get_ras_block_str(head),
                                amdgpu_ras_is_poison_mode_supported(adev), ret);
-                       goto out;
+                       return ret;
                }
+
+               kfree(info);
        }
 
        /* setup the obj */
        __amdgpu_ras_feature_enable(adev, head, enable);
-out:
-       if (head->block == AMDGPU_RAS_BLOCK__GFX)
-               kfree(info);
-       return ret;
+
+       return 0;
 }
 
 /* Only used in device probe stage and called only once. */
@@ -2399,6 +2399,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev)) {
                switch (adev->ip_versions[MP0_HWIP][0]) {
                case IP_VERSION(13, 0, 2):
+               case IP_VERSION(13, 0, 6):
                        return true;
                default:
                        return false;
index 4764d21..595d5e5 100644 (file)
@@ -158,9 +158,10 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
        case IP_VERSION(11, 0, 7): /* Sienna cichlid */
        case IP_VERSION(13, 0, 0):
        case IP_VERSION(13, 0, 2): /* Aldebaran */
-       case IP_VERSION(13, 0, 6):
        case IP_VERSION(13, 0, 10):
                return true;
+       case IP_VERSION(13, 0, 6):
+               return (adev->gmc.is_app_apu) ? false : true;
        default:
                return false;
        }
index 57ed4e5..0a26a00 100644 (file)
@@ -203,6 +203,9 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
                if (adev->rev_id == 0) {
                        WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
                                              REDUCE_FIFO_DEPTH_BY_2, 2);
+               } else {
+                       WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
+                                               SPARE, 0x1);
                }
        }
 }
@@ -860,11 +863,15 @@ static int gfx_v9_4_3_sw_init(void *handle)
        if (r)
                return r;
 
-       r = amdgpu_gfx_sysfs_init(adev);
+       r = amdgpu_gfx_ras_sw_init(adev);
        if (r)
                return r;
 
-       return amdgpu_gfx_ras_sw_init(adev);
+
+       if (!amdgpu_sriov_vf(adev))
+               r = amdgpu_gfx_sysfs_init(adev);
+
+       return r;
 }
 
 static int gfx_v9_4_3_sw_fini(void *handle)
@@ -885,7 +892,8 @@ static int gfx_v9_4_3_sw_fini(void *handle)
        gfx_v9_4_3_mec_fini(adev);
        amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
        gfx_v9_4_3_free_microcode(adev);
-       amdgpu_gfx_sysfs_fini(adev);
+       if (!amdgpu_sriov_vf(adev))
+               amdgpu_gfx_sysfs_fini(adev);
 
        return 0;
 }
@@ -2219,15 +2227,6 @@ static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
                WREG32_SOC15(GC, GET_INST(GC, xcc_id),
                             regRLC_CGTT_MGCG_OVERRIDE, data);
 
-       def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
-
-       if (enable)
-               data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
-       else
-               data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
-
-       if (def != data)
-               WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
 }
 
 static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
@@ -4048,7 +4047,8 @@ static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
        uint32_t i;
        uint32_t data;
 
-       data = REG_SET_FIELD(0, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+       data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+       data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
                             amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
 
        if (amdgpu_watchdog_timer.timeout_fatal_disable &&
index 1561291..1de79d6 100644 (file)
@@ -360,8 +360,10 @@ static int jpeg_v4_0_3_hw_fini(void *handle)
 
        cancel_delayed_work_sync(&adev->jpeg.idle_work);
 
-       if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
-               ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+       if (!amdgpu_sriov_vf(adev)) {
+               if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+                       ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+       }
 
        return ret;
 }
index 9ea0723..f85eec0 100644 (file)
@@ -437,6 +437,24 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
                        XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
 
        }
+
+       if (!amdgpu_sriov_vf(adev)) {
+               u32 baco_cntl;
+               for_each_inst(i, adev->aid_mask) {
+                       baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+                       if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+                                        BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+                               baco_cntl &= ~(
+                                       BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+                                       BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+                               dev_dbg(adev->dev,
+                                       "Unsetting baco dummy mode %x",
+                                       baco_cntl);
+                               WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+                                            baco_cntl);
+                       }
+               }
+       }
 }
 
 static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
index 10b17bd..469eed0 100644 (file)
@@ -133,12 +133,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
        return sol_reg != 0x0;
 }
 
-static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 {
        struct amdgpu_device *adev = psp->adev;
+       int retry_loop, ret;
 
-       int ret;
-       int retry_loop;
+       for (retry_loop = 0; retry_loop < 70; retry_loop++) {
+               /* Wait for bootloader to signify that is
+                  ready having bit 31 of C2PMSG_33 set to 1 */
+               ret = psp_wait_for(
+                       psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+                       0x80000000, 0xffffffff, false);
+
+               if (ret == 0)
+                       break;
+       }
+
+       if (ret)
+               dev_warn(adev->dev, "Bootloader wait timed out");
+
+       return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+       struct amdgpu_device *adev = psp->adev;
+       int retry_loop, ret;
 
        /* Wait for bootloader to signify that it is ready having bit 31 of
         * C2PMSG_35 set to 1. All other bits are expected to be cleared.
@@ -157,6 +177,19 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
        return ret;
 }
 
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+       struct amdgpu_device *adev = psp->adev;
+
+       if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) {
+               psp_v13_0_wait_for_vmbx_ready(psp);
+
+               return psp_v13_0_wait_for_bootloader(psp);
+       }
+
+       return 0;
+}
+
 static int psp_v13_0_bootloader_load_component(struct psp_context      *psp,
                                               struct psp_bin_desc      *bin_desc,
                                               enum psp_bootloader_cmd  bl_cmd)
@@ -714,6 +747,7 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
 
 static const struct psp_funcs psp_v13_0_funcs = {
        .init_microcode = psp_v13_0_init_microcode,
+       .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
        .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
        .bootloader_load_spl = psp_v13_0_bootloader_load_spl,
        .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
index c45721c..f5be40d 100644 (file)
@@ -559,8 +559,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
                 */
                if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
                        return AMD_RESET_METHOD_MODE2;
+               else if (!(adev->flags & AMD_IS_APU))
+                       return AMD_RESET_METHOD_MODE1;
                else
-                       return AMD_RESET_METHOD_NONE;
+                       return AMD_RESET_METHOD_MODE2;
        default:
                break;
        }
index f0731a6..830396b 100644 (file)
@@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
                        default:
                                break;
                        }
-                       kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+                       kfd_signal_event_interrupt(pasid, sq_int_data, 24);
                } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
                        kfd_set_dbg_ev_from_interrupt(dev, pasid,
                                KFD_DEBUG_DOORBELL_ID(context_id0),
index 2319467..0bbf0ed 100644 (file)
@@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->is_occupied = kfd_is_occupied_cp;
                mqd->mqd_size = sizeof(struct v11_compute_mqd);
                mqd->get_wave_state = get_wave_state;
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->destroy_mqd = destroy_hiq_mqd;
                mqd->is_occupied = kfd_is_occupied_cp;
                mqd->mqd_size = sizeof(struct v11_compute_mqd);
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->destroy_mqd = kfd_destroy_mqd_sdma;
                mqd->is_occupied = kfd_is_occupied_sdma;
                mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
index 0115616..bb16b79 100644 (file)
@@ -1686,6 +1686,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
                WRITE_ONCE(p->svms.faulting_task, NULL);
                if (r) {
                        pr_debug("failed %d to get svm range pages\n", r);
+                       if (r == -EBUSY)
+                               r = -EAGAIN;
                        goto unreserve_out;
                }
 
index 268cb99..88ba8b6 100644 (file)
@@ -65,6 +65,7 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 #include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -4265,6 +4266,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
        enum dc_connection_type new_connection_type = dc_connection_none;
        const struct dc_plane_cap *plane;
        bool psr_feature_enabled = false;
+       bool replay_feature_enabled = false;
        int max_overlay = dm->dc->caps.max_slave_planes;
 
        dm->display_indexes_num = dm->dc->caps.max_streams;
@@ -4374,6 +4376,20 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                }
        }
 
+       if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+               switch (adev->ip_versions[DCE_HWIP][0]) {
+               case IP_VERSION(3, 1, 4):
+               case IP_VERSION(3, 1, 5):
+               case IP_VERSION(3, 1, 6):
+               case IP_VERSION(3, 2, 0):
+               case IP_VERSION(3, 2, 1):
+                       replay_feature_enabled = true;
+                       break;
+               default:
+                       replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
+                       break;
+               }
+       }
        /* loops over all connectors on the board */
        for (i = 0; i < link_cnt; i++) {
                struct dc_link *link = NULL;
@@ -4422,6 +4438,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                                amdgpu_dm_update_connector_after_detect(aconnector);
                                setup_backlight_device(dm, aconnector);
 
+                               /*
+                                * Disable psr if replay can be enabled
+                                */
+                               if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
+                                       psr_feature_enabled = false;
+
                                if (psr_feature_enabled)
                                        amdgpu_dm_set_psr_caps(link);
 
@@ -6004,7 +6026,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (recalculate_timing)
                drm_mode_set_crtcinfo(&saved_mode, 0);
-       else
+       else if (!old_stream)
                drm_mode_set_crtcinfo(&mode, 0);
 
        /*
index 30d4c6f..97b7a0b 100644 (file)
@@ -29,6 +29,7 @@
 #include "dc.h"
 #include "amdgpu.h"
 #include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_plane.h"
 #include "amdgpu_dm_trace.h"
@@ -123,7 +124,12 @@ static void vblank_control_worker(struct work_struct *work)
         * fill_dc_dirty_rects().
         */
        if (vblank_work->stream && vblank_work->stream->link) {
-               if (vblank_work->enable) {
+               /*
+                * Prioritize replay, instead of psr
+                */
+               if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
+                       amdgpu_dm_replay_enable(vblank_work->stream, false);
+               else if (vblank_work->enable) {
                        if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
                            vblank_work->stream->link->psr_settings.psr_allow_active)
                                amdgpu_dm_psr_disable(vblank_work->stream);
@@ -132,6 +138,7 @@ static void vblank_control_worker(struct work_struct *work)
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
                           !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
 #endif
+                          vblank_work->stream->link->panel_config.psr.disallow_replay &&
                           vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
                        amdgpu_dm_psr_enable(vblank_work->stream);
                }
index 8eeca16..cc74dd6 100644 (file)
@@ -1269,6 +1269,13 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
        attributes.rotation_angle    = 0;
        attributes.attribute_flags.value = 0;
 
+       /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+        * legacy gamma setup.
+        */
+       if (crtc_state->cm_is_degamma_srgb &&
+           adev->dm.dc->caps.color.dpp.gamma_corr)
+               attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
        attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
 
        if (crtc_state->stream) {
@@ -1468,6 +1475,15 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
                drm_plane_create_blend_mode_property(plane, blend_caps);
        }
 
+       if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+               drm_plane_create_zpos_immutable_property(plane, 0);
+       } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+               unsigned int zpos = 1 + drm_plane_index(plane);
+               drm_plane_create_zpos_property(plane, zpos, 1, 254);
+       } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+               drm_plane_create_zpos_immutable_property(plane, 255);
+       }
+
        if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
            plane_cap &&
            (plane_cap->pixel_format_support.nv12 ||
index 69ffd44..1b8c2ae 100644 (file)
@@ -78,3 +78,4 @@ DC_EDID += dc_edid_parser.o
 AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
 AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
 AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
+
index 3e0da87..1042cf1 100644 (file)
@@ -32,6 +32,7 @@
 
 #define MAX_INSTANCE                                        6
 #define MAX_SEGMENT                                         6
+#define SMU_REGISTER_WRITE_RETRY_COUNT                      5
 
 struct IP_BASE_INSTANCE {
     unsigned int segment[MAX_SEGMENT];
@@ -132,6 +133,8 @@ static int dcn315_smu_send_msg_with_param(
                unsigned int msg_id, unsigned int param)
 {
        uint32_t result;
+       uint32_t i = 0;
+       uint32_t read_back_data;
 
        result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
 
@@ -148,10 +151,19 @@ static int dcn315_smu_send_msg_with_param(
        /* Set the parameter register for the SMU message, unit is Mhz */
        REG_WRITE(MP1_SMN_C2PMSG_37, param);
 
-       /* Trigger the message transaction by writing the message ID */
-       generic_write_indirect_reg(CTX,
-               REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
-               mmMP1_C2PMSG_3, msg_id);
+       for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
+               /* Trigger the message transaction by writing the message ID */
+               generic_write_indirect_reg(CTX,
+                       REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+                       mmMP1_C2PMSG_3, msg_id);
+               read_back_data = generic_read_indirect_reg(CTX,
+                       REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+                       mmMP1_C2PMSG_3);
+               if (read_back_data == msg_id)
+                       break;
+               udelay(2);
+               smu_print("SMU msg id write fail %x times. \n", i + 1);
+       }
 
        result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
 
index 566d704..3a9077b 100644 (file)
@@ -2073,12 +2073,12 @@ enum dc_status dc_commit_streams(struct dc *dc,
                }
        }
 
-       /* Check for case where we are going from odm 2:1 to max
-        *  pipe scenario.  For these cases, we will call
-        *  commit_minimal_transition_state() to exit out of odm 2:1
-        *  first before processing new streams
+       /* ODM Combine 2:1 power optimization is only applied for single stream
+        * scenario, it uses extra pipes than needed to reduce power consumption
+        * We need to switch off this feature to make room for new streams.
         */
-       if (stream_count == dc->res_pool->pipe_count) {
+       if (stream_count > dc->current_state->stream_count &&
+                       dc->current_state->stream_count == 1) {
                for (i = 0; i < dc->res_pool->pipe_count; i++) {
                        pipe = &dc->current_state->res_ctx.pipe_ctx[i];
                        if (pipe->next_odm_pipe)
@@ -3501,6 +3501,45 @@ static void commit_planes_for_stream_fast(struct dc *dc,
                top_pipe_to_program->stream->update_flags.raw = 0;
 }
 
+static void wait_for_outstanding_hw_updates(struct dc *dc, const struct dc_state *dc_context)
+{
+/*
+ * This function calls HWSS to wait for any potentially double buffered
+ * operations to complete. It should be invoked as a pre-amble prior
+ * to full update programming before asserting any HW locks.
+ */
+       int pipe_idx;
+       int opp_inst;
+       int opp_count = dc->res_pool->pipe_count;
+       struct hubp *hubp;
+       int mpcc_inst;
+       const struct pipe_ctx *pipe_ctx;
+
+       for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+               pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
+
+               if (!pipe_ctx->stream)
+                       continue;
+
+               if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
+                       pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
+
+               hubp = pipe_ctx->plane_res.hubp;
+               if (!hubp)
+                       continue;
+
+               mpcc_inst = hubp->inst;
+               // MPCC inst is equal to pipe index in practice
+               for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+                       if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
+                               dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+                               dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+                               break;
+                       }
+               }
+       }
+}
+
 static void commit_planes_for_stream(struct dc *dc,
                struct dc_surface_update *srf_updates,
                int surface_count,
@@ -3519,24 +3558,9 @@ static void commit_planes_for_stream(struct dc *dc,
        // dc->current_state anymore, so we have to cache it before we apply
        // the new SubVP context
        subvp_prev_use = false;
-
-
        dc_z10_restore(dc);
-
-       if (update_type == UPDATE_TYPE_FULL) {
-               /* wait for all double-buffer activity to clear on all pipes */
-               int pipe_idx;
-
-               for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
-                       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
-                       if (!pipe_ctx->stream)
-                               continue;
-
-                       if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
-                               pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
-               }
-       }
+       if (update_type == UPDATE_TYPE_FULL)
+               wait_for_outstanding_hw_updates(dc, context);
 
        if (update_type == UPDATE_TYPE_FULL) {
                dc_allow_idle_optimizations(dc, false);
index 65fa9e2..e72f15a 100644 (file)
@@ -1106,29 +1106,6 @@ void dcn20_blank_pixel_data(
                        v_active,
                        offset);
 
-       if (!blank && dc->debug.enable_single_display_2to1_odm_policy) {
-               /* when exiting dynamic ODM need to reinit DPG state for unused pipes */
-               struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe;
-
-               odm_pipe = pipe_ctx->next_odm_pipe;
-
-               while (old_odm_pipe) {
-                       if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx)
-                               dc->hwss.set_disp_pattern_generator(dc,
-                                               old_odm_pipe,
-                                               CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
-                                               CONTROLLER_DP_COLOR_SPACE_UDEFINED,
-                                               COLOR_DEPTH_888,
-                                               NULL,
-                                               0,
-                                               0,
-                                               0);
-                       old_odm_pipe = old_odm_pipe->next_odm_pipe;
-                       if (odm_pipe)
-                               odm_pipe = odm_pipe->next_odm_pipe;
-               }
-       }
-
        if (!blank)
                if (stream_res->abm) {
                        dc->hwss.set_pipe(pipe_ctx);
@@ -1584,17 +1561,6 @@ static void dcn20_update_dchubp_dpp(
                        || plane_state->update_flags.bits.global_alpha_change
                        || plane_state->update_flags.bits.per_pixel_alpha_change) {
                // MPCC inst is equal to pipe index in practice
-               int mpcc_inst = hubp->inst;
-               int opp_inst;
-               int opp_count = dc->res_pool->pipe_count;
-
-               for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
-                       if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
-                               dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
-                               dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
-                               break;
-                       }
-               }
                hws->funcs.update_mpcc(dc, pipe_ctx);
        }
 
@@ -1722,11 +1688,16 @@ static void dcn20_program_pipe(
                struct dc_state *context)
 {
        struct dce_hwseq *hws = dc->hwseq;
-       /* Only need to unblank on top pipe */
 
-       if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
-                       && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
-               hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
+       /* Only need to unblank on top pipe */
+       if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
+               if (pipe_ctx->update_flags.bits.enable ||
+                               pipe_ctx->update_flags.bits.odm ||
+                               pipe_ctx->stream->update_flags.bits.abm_level)
+                       hws->funcs.blank_pixel_data(dc, pipe_ctx,
+                                       !pipe_ctx->plane_state ||
+                                       !pipe_ctx->plane_state->visible);
+       }
 
        /* Only update TG on top pipe */
        if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
index 6cef62d..255713e 100644 (file)
@@ -987,3 +987,20 @@ void dcn30_prepare_bandwidth(struct dc *dc,
        }
 }
 
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+               int num_pipes, const struct dc_static_screen_params *params)
+{
+       unsigned int i;
+       unsigned int triggers = 0;
+
+       if (params->triggers.surface_update)
+               triggers |= 0x100;
+       if (params->triggers.cursor_update)
+               triggers |= 0x8;
+       if (params->triggers.force_trigger)
+               triggers |= 0x1;
+
+       for (i = 0; i < num_pipes; i++)
+               pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+                                       triggers, params->num_frames);
+}
index a24a8e3..ce19c54 100644 (file)
@@ -87,5 +87,7 @@ void dcn30_set_hubp_blank(const struct dc *dc,
 void dcn30_prepare_bandwidth(struct dc *dc,
        struct dc_state *context);
 
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+               int num_pipes, const struct dc_static_screen_params *params);
 
 #endif /* __DC_HWSS_DCN30_H__ */
index 3d19aca..0de8b27 100644 (file)
@@ -64,7 +64,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 257df86..61205cd 100644 (file)
@@ -75,6 +75,7 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
        .get_hw_state = dcn10_get_hw_state,
        .clear_status_bits = dcn10_clear_status_bits,
        .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+       .edp_backlight_control = dce110_edp_backlight_control,
        .edp_power_control = dce110_edp_power_control,
        .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
        .set_cursor_position = dcn10_set_cursor_position,
index fc25cc3..1d7bc1e 100644 (file)
@@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index ca8fe55..4ef85c3 100644 (file)
@@ -69,7 +69,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 777b2fa..c741714 100644 (file)
@@ -65,7 +65,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 935cd23..f9d601c 100644 (file)
@@ -2564,18 +2564,128 @@ static int find_optimal_free_pipe_as_secondary_dpp_pipe(
        return free_pipe_idx;
 }
 
+static struct pipe_ctx *find_idle_secondary_pipe_check_mpo(
+               struct resource_context *res_ctx,
+               const struct resource_pool *pool,
+               const struct pipe_ctx *primary_pipe)
+{
+       int i;
+       struct pipe_ctx *secondary_pipe = NULL;
+       struct pipe_ctx *next_odm_mpo_pipe = NULL;
+       int primary_index, preferred_pipe_idx;
+       struct pipe_ctx *old_primary_pipe = NULL;
+
+       /*
+        * Modified from find_idle_secondary_pipe
+        * With windowed MPO and ODM, we want to avoid the case where we want a
+        *  free pipe for the left side but the free pipe is being used on the
+        *  right side.
+        * Add check on current_state if the primary_pipe is the left side,
+        *  to check the right side ( primary_pipe->next_odm_pipe ) to see if
+        *  it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe )
+        * - If so, then don't use this pipe
+        * EXCEPTION - 3 plane ( 2 MPO plane ) case
+        * - in this case, the primary pipe has already gotten a free pipe for the
+        *  MPO window in the left
+        * - when it tries to get a free pipe for the MPO window on the right,
+        *  it will see that it is already assigned to the right side
+        *  ( primary_pipe->next_odm_pipe ).  But in this case, we want this
+        *  free pipe, since it will be for the right side.  So add an
+        *  additional condition, that skipping the free pipe on the right only
+        *  applies if the primary pipe has no bottom pipe currently assigned
+        */
+       if (primary_pipe) {
+               primary_index = primary_pipe->pipe_idx;
+               old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index];
+               if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe)
+                       && (!primary_pipe->bottom_pipe))
+                       next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe;
+
+               preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx;
+               if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) &&
+                       !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) {
+                       secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
+                       secondary_pipe->pipe_idx = preferred_pipe_idx;
+               }
+       }
+
+       /*
+        * search backwards for the second pipe to keep pipe
+        * assignment more consistent
+        */
+       if (!secondary_pipe)
+               for (i = pool->pipe_count - 1; i >= 0; i--) {
+                       if ((res_ctx->pipe_ctx[i].stream == NULL) &&
+                               !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) {
+                               secondary_pipe = &res_ctx->pipe_ctx[i];
+                               secondary_pipe->pipe_idx = i;
+                               break;
+                       }
+               }
+
+       return secondary_pipe;
+}
+
+static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+               struct dc_state *state,
+               const struct resource_pool *pool,
+               struct dc_stream_state *stream,
+               const struct pipe_ctx *head_pipe)
+{
+       struct resource_context *res_ctx = &state->res_ctx;
+       struct pipe_ctx *idle_pipe, *pipe;
+       struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx;
+       int head_index;
+
+       if (!head_pipe)
+               ASSERT(0);
+
+       /*
+        * Modified from dcn20_acquire_idle_pipe_for_layer
+        * Check if head_pipe in old_context already has bottom_pipe allocated.
+        * - If so, check if that pipe is available in the current context.
+        * --  If so, reuse pipe from old_context
+        */
+       head_index = head_pipe->pipe_idx;
+       pipe = &old_ctx->pipe_ctx[head_index];
+       if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) {
+               idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx];
+               idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx;
+       } else {
+               idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe);
+               if (!idle_pipe)
+                       return NULL;
+       }
+
+       idle_pipe->stream = head_pipe->stream;
+       idle_pipe->stream_res.tg = head_pipe->stream_res.tg;
+       idle_pipe->stream_res.opp = head_pipe->stream_res.opp;
+
+       idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst;
+
+       return idle_pipe;
+}
+
 struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
                const struct dc_state *cur_ctx,
                struct dc_state *new_ctx,
                const struct resource_pool *pool,
                const struct pipe_ctx *opp_head_pipe)
 {
-       int free_pipe_idx =
-                       find_optimal_free_pipe_as_secondary_dpp_pipe(
-                                       &cur_ctx->res_ctx, &new_ctx->res_ctx,
-                                       pool, opp_head_pipe);
+
+       int free_pipe_idx;
        struct pipe_ctx *free_pipe;
 
+       if (!opp_head_pipe->stream->ctx->dc->config.enable_windowed_mpo_odm)
+               return dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+                               new_ctx, pool, opp_head_pipe->stream, opp_head_pipe);
+
+       free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe(
+                                       &cur_ctx->res_ctx, &new_ctx->res_ctx,
+                                       pool, opp_head_pipe);
        if (free_pipe_idx >= 0) {
                free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx];
                free_pipe->pipe_idx = free_pipe_idx;
index 8afda5e..5805fb0 100644 (file)
@@ -1099,6 +1099,11 @@ void dcn20_calculate_dlg_params(struct dc *dc,
                context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
                                                pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
                context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+               if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
+                   context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+                       dcn20_adjust_freesync_v_startup(
+                               &context->res_ctx.pipe_ctx[i].stream->timing,
+                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
 
                pipe_idx++;
        }
@@ -1927,7 +1932,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
        int vlevel = 0;
        int pipe_split_from[MAX_PIPES];
        int pipe_cnt = 0;
-       int i = 0;
        display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
        DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -1951,15 +1955,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
        dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (!context->res_ctx.pipe_ctx[i].stream)
-                       continue;
-               if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-                       dcn20_adjust_freesync_v_startup(
-                               &context->res_ctx.pipe_ctx[i].stream->timing,
-                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-       }
-
        BW_VAL_TRACE_END_WATERMARKS();
 
        goto validate_out;
@@ -2232,7 +2227,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
        int vlevel = 0;
        int pipe_split_from[MAX_PIPES];
        int pipe_cnt = 0;
-       int i = 0;
        display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
        DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -2261,15 +2255,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
        dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (!context->res_ctx.pipe_ctx[i].stream)
-                       continue;
-               if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-                       dcn20_adjust_freesync_v_startup(
-                               &context->res_ctx.pipe_ctx[i].stream->timing,
-                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-       }
-
        BW_VAL_TRACE_END_WATERMARKS();
 
        goto validate_out;
index 07adb61..fb21572 100644 (file)
@@ -293,6 +293,17 @@ static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_
        return num_lines;
 }
 
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+       unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+       v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+       v_blank = timing->v_total - v_active;
+       v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+       return v_back_porch;
+}
+
 int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
                                               display_e2e_pipe_params_st *pipes,
                                               bool fast_validate)
@@ -310,6 +321,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
        for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
                struct dc_crtc_timing *timing;
                unsigned int num_lines = 0;
+               unsigned int v_back_porch = 0;
 
                if (!res_ctx->pipe_ctx[i].stream)
                        continue;
@@ -323,9 +335,16 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
                else
                        pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
 
+               v_back_porch  = get_vertical_back_porch(timing);
+
                pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
                pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
-               pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width);
+               // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+               // + 2 is because
+               // 1 -> VStartup_start should be 1 line before VSync
+               // 1 -> always reserve 1 line between start of vblank to vstartup signal
+               pipes[pipe_cnt].pipe.dest.vblank_nom =
+                       max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
                pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
 
                if (pipe->plane_state &&
index dbd6081..ef3a674 100644 (file)
@@ -338,7 +338,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
                 *  - Delta for CEIL: delta_from_mid_point_in_us_1
                 *  - Delta for FLOOR: delta_from_mid_point_in_us_2
                 */
-               if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
+               if (mid_point_frames_ceil &&
+                   (last_render_time_in_us / mid_point_frames_ceil) <
+                   in_out_vrr->min_duration_in_us) {
                        /* Check for out of range.
                         * If using CEIL produces a value that is out of range,
                         * then we are forced to use FLOOR.
@@ -385,8 +387,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
                /* Either we've calculated the number of frames to insert,
                 * or we need to insert min duration frames
                 */
-               if (last_render_time_in_us / frames_to_insert <
-                               in_out_vrr->min_duration_in_us){
+               if (frames_to_insert &&
+                   (last_render_time_in_us / frames_to_insert) <
+                   in_out_vrr->min_duration_in_us){
                        frames_to_insert -= (frames_to_insert > 1) ?
                                        1 : 0;
                }
index abe829b..67d7b7e 100644 (file)
@@ -240,6 +240,7 @@ enum DC_FEATURE_MASK {
        DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
        DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
        DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
+       DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
 };
 
 enum DC_DEBUG_MASK {
@@ -250,6 +251,7 @@ enum DC_DEBUG_MASK {
        DC_DISABLE_PSR = 0x10,
        DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
        DC_DISABLE_MPO = 0x40,
+       DC_DISABLE_REPLAY = 0x50,
        DC_ENABLE_DPIA_TRACE = 0x80,
 };
 
index e68c1e2..fa7d6ce 100644 (file)
@@ -3117,6 +3117,24 @@ enum atom_umc_config1_def {
        UMC_CONFIG1__ENABLE_ECC_CAPABLE = 0x00010000,
 };
 
+struct atom_umc_info_v4_0 {
+       struct atom_common_table_header table_header;
+       uint32_t ucode_reserved[5];
+       uint8_t umcip_min_ver;
+       uint8_t umcip_max_ver;
+       uint8_t vram_type;
+       uint8_t umc_config;
+       uint32_t mem_refclk_10khz;
+       uint32_t clk_reserved[4];
+       uint32_t golden_reserved;
+       uint32_t umc_config1;
+       uint32_t reserved[2];
+       uint8_t channel_num;
+       uint8_t channel_width;
+       uint8_t channel_reserve[2];
+       uint8_t umc_info_reserved[16];
+};
+
 /* 
   ***************************************************************************
     Data Table vram_info  structure
index f43e297..7a9d473 100644 (file)
@@ -30,7 +30,7 @@
 #define GC_TABLE_ID                     0x4347
 #define HARVEST_TABLE_SIGNATURE         0x56524148
 #define VCN_INFO_TABLE_ID               0x004E4356
-#define MALL_INFO_TABLE_ID              0x4D414C4C
+#define MALL_INFO_TABLE_ID              0x4C4C414D
 
 typedef enum
 {
@@ -280,6 +280,36 @@ struct gc_info_v2_0 {
        uint32_t gc_num_packer_per_sc;
 };
 
+struct gc_info_v2_1 {
+       struct gpu_info_header header;
+
+       uint32_t gc_num_se;
+       uint32_t gc_num_cu_per_sh;
+       uint32_t gc_num_sh_per_se;
+       uint32_t gc_num_rb_per_se;
+       uint32_t gc_num_tccs;
+       uint32_t gc_num_gprs;
+       uint32_t gc_num_max_gs_thds;
+       uint32_t gc_gs_table_depth;
+       uint32_t gc_gsprim_buff_depth;
+       uint32_t gc_parameter_cache_depth;
+       uint32_t gc_double_offchip_lds_buffer;
+       uint32_t gc_wave_size;
+       uint32_t gc_max_waves_per_simd;
+       uint32_t gc_max_scratch_slots_per_cu;
+       uint32_t gc_lds_size;
+       uint32_t gc_num_sc_per_se;
+       uint32_t gc_num_packer_per_sc;
+       /* new for v2_1 */
+       uint32_t gc_num_tcp_per_sh;
+       uint32_t gc_tcp_size_per_cu;
+       uint32_t gc_num_sdp_interface;
+       uint32_t gc_num_cu_per_sqc;
+       uint32_t gc_instruction_cache_size_per_sqc;
+       uint32_t gc_scalar_data_cache_size_per_sqc;
+       uint32_t gc_tcc_size;
+};
+
 typedef struct harvest_info_header {
        uint32_t signature; /* Table Signature */
        uint32_t version;   /* Table Version */
@@ -312,6 +342,12 @@ struct mall_info_v1_0 {
        uint32_t reserved[5];
 };
 
+struct mall_info_v2_0 {
+       struct mall_info_header header;
+       uint32_t mall_size_per_umc;
+       uint32_t reserved[8];
+};
+
 #define VCN_INFO_TABLE_MAX_NUM_INSTANCES 4
 
 struct vcn_info_header {
index 5b1d73b..41147da 100644 (file)
@@ -3311,8 +3311,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
            (gc_ver != IP_VERSION(9, 4, 3)) &&
            (attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
             attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+            attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
             attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+            attr == &sensor_dev_attr_temp3_label.dev_attr.attr ||
+            attr == &sensor_dev_attr_temp3_crit.dev_attr.attr))
                return 0;
 
        /* hotspot temperature for gc 9,4,3*/
@@ -3324,9 +3326,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
        /* only SOC15 dGPUs support hotspot and mem temperatures */
        if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0) ||
            (gc_ver == IP_VERSION(9, 4, 3))) &&
-           (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+            (attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
             attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
             attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
             attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
@@ -3471,6 +3471,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
        size = sizeof(uint32_t);
        if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
                seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
+       size = sizeof(uint32_t);
+       if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
+               seq_printf(m, "\t%u.%u W (current GPU)\n", query >> 8, query & 0xff);
        size = sizeof(value);
        seq_printf(m, "\n");
 
index 95eb8a5..5a52098 100644 (file)
@@ -1031,10 +1031,7 @@ struct pptable_funcs {
                                                   enum smu_feature_mask mask);
 
        /**
-        * @notify_display_change: Enable fast memory clock switching.
-        *
-        * Allows for fine grained memory clock switching but has more stringent
-        * timing requirements.
+        * @notify_display_change: General interface call to let SMU know about DC change
         */
        int (*notify_display_change)(struct smu_context *smu);
 
index 10cff75..e2ee855 100644 (file)
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-#define PPSMC_Message_Count                      0x4D
+
+#define PPSMC_MSG_DALNotPresent                  0x4E
+
+#define PPSMC_Message_Count                      0x4F
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
index 252aef1..9be4051 100644 (file)
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x5
+#define SMU_METRICS_TABLE_VERSION 0x7
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -198,7 +198,7 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t SocketThmResidencyAcc;
   uint32_t VrThmResidencyAcc;
   uint32_t HbmThmResidencyAcc;
-  uint32_t spare;
+  uint32_t GfxLockXCDMak;
 
   // New Items at end to maintain driver compatibility
   uint32_t GfxclkFrequency[8];
index ae4f44c..70a4a71 100644 (file)
 #define PPSMC_MSG_GetMinGfxDpmFreq                  0x32
 #define PPSMC_MSG_GetMaxGfxDpmFreq                  0x33
 #define PPSMC_MSG_PrepareForDriverUnload            0x34
-#define PPSMC_Message_Count                         0x35
+#define PPSMC_MSG_ReadThrottlerLimit                0x35
+#define PPSMC_MSG_QueryValidMcaCount                0x36
+#define PPSMC_MSG_McaBankDumpDW                     0x37
+#define PPSMC_MSG_GetCTFLimit                       0x38
+#define PPSMC_Message_Count                         0x39
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
 #define PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET         0x2
 #define PPSMC_RESET_TYPE_DRIVER_MODE_3_RESET        0x3
 
+//PPSMC Reset Types for driver msg argument
+#define PPSMC_THROTTLING_LIMIT_TYPE_SOCKET          0x1
+#define PPSMC_THROTTLING_LIMIT_TYPE_HBM             0x2
+
+//CTF/Throttle Limit types
+#define PPSMC_AID_THM_TYPE                          0x1
+#define PPSMC_CCD_THM_TYPE                          0x2
+#define PPSMC_XCD_THM_TYPE                          0x3
+#define PPSMC_HBM_THM_TYPE                          0x4
+
 typedef uint32_t PPSMC_Result;
 typedef uint32_t PPSMC_MSG;
 
index 297b70b..e57265c 100644 (file)
@@ -84,6 +84,7 @@
        __SMU_DUMMY_MAP(SetTjMax),                     \
        __SMU_DUMMY_MAP(SetFanTemperatureTarget),      \
        __SMU_DUMMY_MAP(PrepareMp1ForUnload),          \
+       __SMU_DUMMY_MAP(GetCTFLimit),                  \
        __SMU_DUMMY_MAP(DramLogSetDramAddrHigh),       \
        __SMU_DUMMY_MAP(DramLogSetDramAddrLow),        \
        __SMU_DUMMY_MAP(DramLogSetDramSize),           \
        __SMU_DUMMY_MAP(AllowGpo),      \
        __SMU_DUMMY_MAP(Mode2Reset),    \
        __SMU_DUMMY_MAP(RequestI2cTransaction), \
-       __SMU_DUMMY_MAP(GetMetricsTable),
+       __SMU_DUMMY_MAP(GetMetricsTable), \
+       __SMU_DUMMY_MAP(DALNotPresent),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index f1282fc..0232adb 100644 (file)
@@ -837,12 +837,8 @@ int smu_v13_0_notify_display_change(struct smu_context *smu)
 {
        int ret = 0;
 
-       if (!smu->pm_enabled)
-               return ret;
-
-       if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
-           smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM)
-               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL);
+       if (!amdgpu_device_has_dc_support(smu->adev))
+               ret = smu_cmn_send_smc_msg(smu, SMU_MSG_DALNotPresent, NULL);
 
        return ret;
 }
index 8b7403b..3903a47 100644 (file)
@@ -162,6 +162,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
        MSG_MAP(AllowGpo,                       PPSMC_MSG_SetGpoAllow,           0),
        MSG_MAP(AllowIHHostInterrupt,           PPSMC_MSG_AllowIHHostInterrupt,       0),
        MSG_MAP(ReenableAcDcInterrupt,          PPSMC_MSG_ReenableAcDcInterrupt,       0),
+       MSG_MAP(DALNotPresent,          PPSMC_MSG_DALNotPresent,       0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -2687,6 +2688,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
        .gpo_control = smu_v13_0_gpo_control,
        .get_ecc_info = smu_v13_0_0_get_ecc_info,
+       .notify_display_change = smu_v13_0_notify_display_change,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
index 6ed9cd0..199a673 100644 (file)
@@ -132,6 +132,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
        MSG_MAP(SetSoftMinGfxclk,                    PPSMC_MSG_SetSoftMinGfxClk,                0),
        MSG_MAP(SetSoftMaxGfxClk,                    PPSMC_MSG_SetSoftMaxGfxClk,                0),
        MSG_MAP(PrepareMp1ForUnload,                 PPSMC_MSG_PrepareForDriverUnload,          0),
+       MSG_MAP(GetCTFLimit,                         PPSMC_MSG_GetCTFLimit,                     0),
 };
 
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
@@ -2081,6 +2082,55 @@ out:
        return ret;
 }
 
+static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu,
+                                                    struct smu_temperature_range *range)
+{
+       struct amdgpu_device *adev = smu->adev;
+       u32 aid_temp, xcd_temp, mem_temp;
+       uint32_t smu_version;
+       u32 ccd_temp = 0;
+       int ret;
+
+       if (amdgpu_sriov_vf(smu->adev))
+               return 0;
+
+       if (!range)
+               return -EINVAL;
+
+       /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */
+       smu_cmn_get_smc_version(smu, NULL, &smu_version);
+       if (smu_version < 0x554500)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_AID_THM_TYPE, &aid_temp);
+       if (ret)
+               goto failed;
+
+       if (adev->flags & AMD_IS_APU) {
+               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                                     PPSMC_CCD_THM_TYPE, &ccd_temp);
+               if (ret)
+                       goto failed;
+       }
+
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_XCD_THM_TYPE, &xcd_temp);
+       if (ret)
+               goto failed;
+
+       range->hotspot_crit_max = max3(aid_temp, xcd_temp, ccd_temp) *
+                                      SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_HBM_THM_TYPE, &mem_temp);
+       if (ret)
+               goto failed;
+
+       range->mem_crit_max = mem_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+failed:
+       return ret;
+}
+
 static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
 {
        struct amdgpu_device *adev = smu->adev;
@@ -2108,8 +2158,7 @@ static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
 
 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
 {
-       /* TODO: Enable this when FW support is added */
-       return false;
+       return true;
 }
 
 static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu)
@@ -2177,6 +2226,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
        .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
        .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
        .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+       .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
        .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
        .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
        .mode1_reset = smu_v13_0_6_mode1_reset,
index e99a6fa..a7e6775 100644 (file)
@@ -58,6 +58,7 @@ struct i915_perf_group;
 
 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
 
 struct intel_hw_status_page {
        struct list_head timelines;
index a0e3ef1..b5b7f2f 100644 (file)
@@ -5470,6 +5470,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
 
        ve->base.flags = I915_ENGINE_IS_VIRTUAL;
 
+       BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
+       ve->base.mask = VIRTUAL_ENGINES;
+
        intel_context_init(&ve->context, &ve->base);
 
        for (n = 0; n < count; n++) {
index 4ec8530..094fca9 100644 (file)
 static bool enable_out_of_sync = false;
 static int preallocated_oos_pages = 8192;
 
-static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
-{
-       struct kvm *kvm = vgpu->vfio_device.kvm;
-       int idx;
-       bool ret;
-
-       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-               return false;
-
-       idx = srcu_read_lock(&kvm->srcu);
-       ret = kvm_is_visible_gfn(kvm, gfn);
-       srcu_read_unlock(&kvm->srcu, idx);
-
-       return ret;
-}
-
 /*
  * validate a gm address and related range size,
  * translate it to host gm address
@@ -1161,31 +1145,6 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
        ops->set_pfn(se, s->shadow_page.mfn);
 }
 
-/*
- * Check if can do 2M page
- * @vgpu: target vgpu
- * @entry: target pfn's gtt entry
- *
- * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
- * negative if found err.
- */
-static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
-       struct intel_gvt_gtt_entry *entry)
-{
-       const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
-       kvm_pfn_t pfn;
-
-       if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
-               return 0;
-
-       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-               return -EINVAL;
-       pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
-       if (is_error_noslot_pfn(pfn))
-               return -EINVAL;
-       return PageTransHuge(pfn_to_page(pfn));
-}
-
 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
        struct intel_gvt_gtt_entry *se)
@@ -1279,7 +1238,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
 {
        const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
        struct intel_gvt_gtt_entry se = *ge;
-       unsigned long gfn, page_size = PAGE_SIZE;
+       unsigned long gfn;
        dma_addr_t dma_addr;
        int ret;
 
@@ -1291,6 +1250,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
        switch (ge->type) {
        case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
                gvt_vdbg_mm("shadow 4K gtt entry\n");
+               ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
+               if (ret)
+                       return -ENXIO;
                break;
        case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
                gvt_vdbg_mm("shadow 64K gtt entry\n");
@@ -1302,25 +1264,20 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
                return split_64KB_gtt_entry(vgpu, spt, index, &se);
        case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
                gvt_vdbg_mm("shadow 2M gtt entry\n");
-               ret = is_2MB_gtt_possible(vgpu, ge);
-               if (ret == 0)
+               if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
+                   intel_gvt_dma_map_guest_page(vgpu, gfn,
+                                                I915_GTT_PAGE_SIZE_2M, &dma_addr))
                        return split_2MB_gtt_entry(vgpu, spt, index, &se);
-               else if (ret < 0)
-                       return ret;
-               page_size = I915_GTT_PAGE_SIZE_2M;
                break;
        case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
                gvt_vgpu_err("GVT doesn't support 1GB entry\n");
                return -EINVAL;
        default:
                GEM_BUG_ON(1);
+               return -EINVAL;
        }
 
-       /* direct shadow */
-       ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
-       if (ret)
-               return -ENXIO;
-
+       /* Successfully shadowed a 4K or 2M page (without splitting). */
        pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
        ppgtt_set_shadow_entry(spt, &se, index);
        return 0;
@@ -1329,11 +1286,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
 {
        struct intel_vgpu *vgpu = spt->vgpu;
-       struct intel_gvt *gvt = vgpu->gvt;
-       const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
        struct intel_vgpu_ppgtt_spt *s;
        struct intel_gvt_gtt_entry se, ge;
-       unsigned long gfn, i;
+       unsigned long i;
        int ret;
 
        trace_spt_change(spt->vgpu->id, "born", spt,
@@ -1350,13 +1305,6 @@ static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
                        ppgtt_generate_shadow_entry(&se, s, &ge);
                        ppgtt_set_shadow_entry(spt, &se, i);
                } else {
-                       gfn = ops->get_pfn(&ge);
-                       if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
-                               ops->set_pfn(&se, gvt->gtt.scratch_mfn);
-                               ppgtt_set_shadow_entry(spt, &se, i);
-                               continue;
-                       }
-
                        ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
                        if (ret)
                                goto fail;
@@ -1845,6 +1793,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
        if (mm->ppgtt_mm.shadowed)
                return 0;
 
+       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+               return -EINVAL;
+
        mm->ppgtt_mm.shadowed = true;
 
        for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
@@ -2331,14 +2282,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                m.val64 = e.val64;
                m.type = e.type;
 
-               /* one PTE update may be issued in multiple writes and the
-                * first write may not construct a valid gfn
-                */
-               if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
-                       ops->set_pfn(&m, gvt->gtt.scratch_mfn);
-                       goto out;
-               }
-
                ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
                                                   &dma_addr);
                if (ret) {
@@ -2355,7 +2298,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                ops->clear_present(&m);
        }
 
-out:
        ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
 
        ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
@@ -2876,24 +2818,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
 }
 
 /**
- * intel_vgpu_reset_gtt - reset the all GTT related status
- * @vgpu: a vGPU
- *
- * This function is called from vfio core to reset reset all
- * GTT related status, including GGTT, PPGTT, scratch page.
- *
- */
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
-{
-       /* Shadow pages are only created when there is no page
-        * table tracking data, so remove page tracking data after
-        * removing the shadow pages.
-        */
-       intel_vgpu_destroy_all_ppgtt_mm(vgpu);
-       intel_vgpu_reset_ggtt(vgpu, true);
-}
-
-/**
  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
  * @gvt: intel gvt device
  *
index a3b0f59..4cb183e 100644 (file)
@@ -224,7 +224,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
 
 int intel_gvt_init_gtt(struct intel_gvt *gvt);
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
 void intel_gvt_clean_gtt(struct intel_gvt *gvt);
 
 struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
index 2d65800..53a0a42 100644 (file)
 #define _GVT_H_
 
 #include <uapi/linux/pci_regs.h>
-#include <linux/kvm_host.h>
 #include <linux/vfio.h>
 #include <linux/mdev.h>
 
+#include <asm/kvm_page_track.h>
+
 #include "i915_drv.h"
 #include "intel_gvt.h"
 
index 9cd9e9d..42ce20e 100644 (file)
@@ -106,12 +106,10 @@ struct gvt_dma {
 #define vfio_dev_to_vgpu(vfio_dev) \
        container_of((vfio_dev), struct intel_vgpu, vfio_device)
 
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-               const u8 *val, int len,
-               struct kvm_page_track_notifier_node *node);
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
-               struct kvm_memory_slot *slot,
-               struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+                                  struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+                                          struct kvm_page_track_notifier_node *node);
 
 static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
 {
@@ -161,8 +159,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 
                if (npage == 0)
                        base_page = cur_page;
-               else if (base_page + npage != cur_page) {
-                       gvt_vgpu_err("The pages are not continuous\n");
+               else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
                        ret = -EINVAL;
                        npage++;
                        goto err;
@@ -172,7 +169,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
        *page = base_page;
        return 0;
 err:
-       gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
+       if (npage)
+               gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
        return ret;
 }
 
@@ -352,6 +350,8 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
 {
        struct kvmgt_pgfn *p, *res = NULL;
 
+       lockdep_assert_held(&info->vgpu_lock);
+
        hash_for_each_possible(info->ptable, p, hnode, gfn) {
                if (gfn == p->gfn) {
                        res = p;
@@ -654,21 +654,19 @@ out:
 static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 {
        struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
-
-       if (!vgpu->vfio_device.kvm ||
-           vgpu->vfio_device.kvm->mm != current->mm) {
-               gvt_vgpu_err("KVM is required to use Intel vGPU\n");
-               return -ESRCH;
-       }
+       int ret;
 
        if (__kvmgt_vgpu_exist(vgpu))
                return -EEXIST;
 
        vgpu->track_node.track_write = kvmgt_page_track_write;
-       vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
-       kvm_get_kvm(vgpu->vfio_device.kvm);
-       kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
-                                        &vgpu->track_node);
+       vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region;
+       ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+                                              &vgpu->track_node);
+       if (ret) {
+               gvt_vgpu_err("KVM is required to use Intel vGPU\n");
+               return ret;
+       }
 
        set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
 
@@ -703,7 +701,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 
        kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
                                           &vgpu->track_node);
-       kvm_put_kvm(vgpu->vfio_device.kvm);
 
        kvmgt_protect_table_destroy(vgpu);
        gvt_cache_destroy(vgpu);
@@ -1547,95 +1544,70 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
 
 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->vfio_device.kvm;
-       struct kvm_memory_slot *slot;
-       int idx;
+       int r;
 
        if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
                return -ESRCH;
 
-       idx = srcu_read_lock(&kvm->srcu);
-       slot = gfn_to_memslot(kvm, gfn);
-       if (!slot) {
-               srcu_read_unlock(&kvm->srcu, idx);
-               return -EINVAL;
-       }
-
-       write_lock(&kvm->mmu_lock);
-
        if (kvmgt_gfn_is_write_protected(info, gfn))
-               goto out;
+               return 0;
 
-       kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
-       kvmgt_protect_table_add(info, gfn);
+       r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
+       if (r)
+               return r;
 
-out:
-       write_unlock(&kvm->mmu_lock);
-       srcu_read_unlock(&kvm->srcu, idx);
+       kvmgt_protect_table_add(info, gfn);
        return 0;
 }
 
 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->vfio_device.kvm;
-       struct kvm_memory_slot *slot;
-       int idx;
+       int r;
 
        if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
                return -ESRCH;
 
-       idx = srcu_read_lock(&kvm->srcu);
-       slot = gfn_to_memslot(kvm, gfn);
-       if (!slot) {
-               srcu_read_unlock(&kvm->srcu, idx);
-               return -EINVAL;
-       }
-
-       write_lock(&kvm->mmu_lock);
-
        if (!kvmgt_gfn_is_write_protected(info, gfn))
-               goto out;
+               return 0;
 
-       kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
-       kvmgt_protect_table_del(info, gfn);
+       r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
+       if (r)
+               return r;
 
-out:
-       write_unlock(&kvm->mmu_lock);
-       srcu_read_unlock(&kvm->srcu, idx);
+       kvmgt_protect_table_del(info, gfn);
        return 0;
 }
 
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-               const u8 *val, int len,
-               struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+                                  struct kvm_page_track_notifier_node *node)
 {
        struct intel_vgpu *info =
                container_of(node, struct intel_vgpu, track_node);
 
-       if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
+       mutex_lock(&info->vgpu_lock);
+
+       if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT))
                intel_vgpu_page_track_handler(info, gpa,
                                                     (void *)val, len);
+
+       mutex_unlock(&info->vgpu_lock);
 }
 
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
-               struct kvm_memory_slot *slot,
-               struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+                                          struct kvm_page_track_notifier_node *node)
 {
-       int i;
-       gfn_t gfn;
+       unsigned long i;
        struct intel_vgpu *info =
                container_of(node, struct intel_vgpu, track_node);
 
-       write_lock(&kvm->mmu_lock);
-       for (i = 0; i < slot->npages; i++) {
-               gfn = slot->base_gfn + i;
-               if (kvmgt_gfn_is_write_protected(info, gfn)) {
-                       kvm_slot_page_track_remove_page(kvm, slot, gfn,
-                                               KVM_PAGE_TRACK_WRITE);
-                       kvmgt_protect_table_del(info, gfn);
-               }
+       mutex_lock(&info->vgpu_lock);
+
+       for (i = 0; i < nr_pages; i++) {
+               if (kvmgt_gfn_is_write_protected(info, gfn + i))
+                       kvmgt_protect_table_del(info, gfn + i);
        }
-       write_unlock(&kvm->mmu_lock);
+
+       mutex_unlock(&info->vgpu_lock);
 }
 
 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
index df34e73..60a6543 100644 (file)
@@ -162,13 +162,9 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
        struct intel_vgpu_page_track *page_track;
        int ret = 0;
 
-       mutex_lock(&vgpu->vgpu_lock);
-
        page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
-       if (!page_track) {
-               ret = -ENXIO;
-               goto out;
-       }
+       if (!page_track)
+               return -ENXIO;
 
        if (unlikely(vgpu->failsafe)) {
                /* Remove write protection to prevent furture traps. */
@@ -179,7 +175,5 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
                        gvt_err("guest page write error, gpa %llx\n", gpa);
        }
 
-out:
-       mutex_unlock(&vgpu->vgpu_lock);
        return ret;
 }
index 7c7da28..f590810 100644 (file)
@@ -134,9 +134,7 @@ static void i915_fence_release(struct dma_fence *fence)
        i915_sw_fence_fini(&rq->semaphore);
 
        /*
-        * Keep one request on each engine for reserved use under mempressure
-        * do not use with virtual engines as this really is only needed for
-        * kernel contexts.
+        * Keep one request on each engine for reserved use under mempressure.
         *
         * We do not hold a reference to the engine here and so have to be
         * very careful in what rq->engine we poke. The virtual engine is
@@ -166,8 +164,7 @@ static void i915_fence_release(struct dma_fence *fence)
         * know that if the rq->execution_mask is a single bit, rq->engine
         * can be a physical engine with the exact corresponding mask.
         */
-       if (!intel_engine_is_virtual(rq->engine) &&
-           is_power_of_2(rq->execution_mask) &&
+       if (is_power_of_2(rq->execution_mask) &&
            !cmpxchg(&rq->engine->request_pool, NULL, rq))
                return;
 
index a349245..a34917b 100644 (file)
@@ -1122,18 +1122,11 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
        PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
        PUSH_KICK(push);
 
-       ret = nouveau_fence_new(pfence);
+       ret = nouveau_fence_new(pfence, chan);
        if (ret)
                goto fail;
 
-       ret = nouveau_fence_emit(*pfence, chan);
-       if (ret)
-               goto fail_fence_unref;
-
        return 0;
-
-fail_fence_unref:
-       nouveau_fence_unref(pfence);
 fail:
        spin_lock_irqsave(&dev->event_lock, flags);
        list_del(&s->head);
index 19cab37..0f3bd18 100644 (file)
@@ -875,16 +875,10 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
        if (ret)
                goto out_unlock;
 
-       ret = nouveau_fence_new(&fence);
+       ret = nouveau_fence_new(&fence, chan);
        if (ret)
                goto out_unlock;
 
-       ret = nouveau_fence_emit(fence, chan);
-       if (ret) {
-               nouveau_fence_unref(&fence);
-               goto out_unlock;
-       }
-
        /* TODO: figure out a better solution here
         *
         * wait on the fence here explicitly as going through
index 1fd5ccf..bb3d6e5 100644 (file)
@@ -70,11 +70,9 @@ nouveau_channel_idle(struct nouveau_channel *chan)
                struct nouveau_fence *fence = NULL;
                int ret;
 
-               ret = nouveau_fence_new(&fence);
+               ret = nouveau_fence_new(&fence, chan);
                if (!ret) {
-                       ret = nouveau_fence_emit(fence, chan);
-                       if (!ret)
-                               ret = nouveau_fence_wait(fence, false, false);
+                       ret = nouveau_fence_wait(fence, false, false);
                        nouveau_fence_unref(&fence);
                }
 
index 61e8456..12feecf 100644 (file)
@@ -209,8 +209,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
                goto done;
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, dmem->migrate.chan);
+       nouveau_fence_new(&fence, dmem->migrate.chan);
        migrate_vma_pages(&args);
        nouveau_dmem_fence_done(&fence);
        dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -403,8 +402,7 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
                }
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
+       nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan);
        migrate_device_pages(src_pfns, dst_pfns, npages);
        nouveau_dmem_fence_done(&fence);
        migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -677,8 +675,7 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
                addr += PAGE_SIZE;
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, drm->dmem->migrate.chan);
+       nouveau_fence_new(&fence, drm->dmem->migrate.chan);
        migrate_vma_pages(args);
        nouveau_dmem_fence_done(&fence);
        nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
index a90c4cd..19024ce 100644 (file)
@@ -96,7 +96,8 @@ nouveau_exec_job_submit(struct nouveau_job *job)
        unsigned long index;
        int ret;
 
-       ret = nouveau_fence_new(&exec_job->fence);
+       /* Create a new fence, but do not emit yet. */
+       ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
        if (ret)
                return ret;
 
@@ -170,13 +171,17 @@ nouveau_exec_job_run(struct nouveau_job *job)
                nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
        }
 
-       ret = nouveau_fence_emit(fence, chan);
+       ret = nouveau_fence_emit(fence);
        if (ret) {
+               nouveau_fence_unref(&exec_job->fence);
                NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
                WIND_RING(chan);
                return ERR_PTR(ret);
        }
 
+       /* The fence was emitted successfully, set the job's fence pointer to
+        * NULL in order to avoid freeing it up when the job is cleaned up.
+        */
        exec_job->fence = NULL;
 
        return &fence->base;
@@ -189,7 +194,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
 
        nouveau_job_free(job);
 
-       nouveau_fence_unref(&exec_job->fence);
+       kfree(exec_job->fence);
        kfree(exec_job->push.s);
        kfree(exec_job);
 }
index 77c739a..61d9e70 100644 (file)
@@ -205,16 +205,13 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 }
 
 int
-nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
+nouveau_fence_emit(struct nouveau_fence *fence)
 {
+       struct nouveau_channel *chan = fence->channel;
        struct nouveau_fence_chan *fctx = chan->fence;
        struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
        int ret;
 
-       if (unlikely(!chan->fence))
-               return -ENODEV;
-
-       fence->channel  = chan;
        fence->timeout  = jiffies + (15 * HZ);
 
        if (priv->uevent)
@@ -406,18 +403,41 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
 }
 
 int
-nouveau_fence_new(struct nouveau_fence **pfence)
+nouveau_fence_create(struct nouveau_fence **pfence,
+                    struct nouveau_channel *chan)
 {
        struct nouveau_fence *fence;
 
+       if (unlikely(!chan->fence))
+               return -ENODEV;
+
        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
        if (!fence)
                return -ENOMEM;
 
+       fence->channel = chan;
+
        *pfence = fence;
        return 0;
 }
 
+int
+nouveau_fence_new(struct nouveau_fence **pfence,
+                 struct nouveau_channel *chan)
+{
+       int ret = 0;
+
+       ret = nouveau_fence_create(pfence, chan);
+       if (ret)
+               return ret;
+
+       ret = nouveau_fence_emit(*pfence);
+       if (ret)
+               nouveau_fence_unref(pfence);
+
+       return ret;
+}
+
 static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
 {
        return "nouveau";
index 2c72d96..64d33ae 100644 (file)
@@ -17,10 +17,11 @@ struct nouveau_fence {
        unsigned long timeout;
 };
 
-int  nouveau_fence_new(struct nouveau_fence **);
+int  nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel *);
+int  nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *);
 void nouveau_fence_unref(struct nouveau_fence **);
 
-int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
+int  nouveau_fence_emit(struct nouveau_fence *);
 bool nouveau_fence_done(struct nouveau_fence *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
index c0b10d8..a0d303e 100644 (file)
@@ -914,11 +914,8 @@ revalidate:
                }
        }
 
-       ret = nouveau_fence_new(&fence);
-       if (!ret)
-               ret = nouveau_fence_emit(fence, chan);
+       ret = nouveau_fence_new(&fence, chan);
        if (ret) {
-               nouveau_fence_unref(&fence);
                NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
                WIND_RING(chan);
                goto out;
index 08aeb69..87283e4 100644 (file)
@@ -1308,7 +1308,11 @@ static int i3c_master_get_i3c_addrs(struct i3c_dev_desc *dev)
        if (dev->info.static_addr) {
                status = i3c_bus_get_addr_slot_status(&master->bus,
                                                      dev->info.static_addr);
-               if (status != I3C_ADDR_SLOT_FREE)
+               /* Since static address and assigned dynamic address can be
+                * equal, allow this case to pass.
+                */
+               if (status != I3C_ADDR_SLOT_FREE &&
+                   dev->info.static_addr != dev->boardinfo->init_dyn_addr)
                        return -EBUSY;
 
                i3c_bus_set_addr_slot_status(&master->bus,
index 09ed19d..01a47d3 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
index 01610fa..49551db 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-#include <linux/of_device.h>
 
 #define DEV_ID                         0x0
 #define DEV_ID_I3C_MASTER              0x5034
index d97c317..6a781f8 100644 (file)
@@ -339,7 +339,7 @@ static int hci_cmd_v1_daa(struct i3c_hci *hci)
                        break;
                }
                if (RESP_STATUS(xfer[0].response) == RESP_ERR_NACK &&
-                   RESP_STATUS(xfer[0].response) == 1) {
+                   RESP_DATA_LENGTH(xfer->response) == 1) {
                        ret = 0;  /* no more devices to be assigned */
                        break;
                }
index 0d63b73..8f8295a 100644 (file)
@@ -156,6 +156,7 @@ struct svc_i3c_regs_save {
  * @base: I3C master controller
  * @dev: Corresponding device
  * @regs: Memory mapping
+ * @saved_regs: Volatile values for PM operations
  * @free_slots: Bit array of available slots
  * @addrs: Array containing the dynamic addresses of each attached device
  * @descs: Array of descriptors, one per attached device
@@ -789,6 +790,10 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master,
                                 */
                                break;
                        } else if (SVC_I3C_MSTATUS_NACKED(reg)) {
+                               /* No I3C devices attached */
+                               if (dev_nb == 0)
+                                       break;
+
                                /*
                                 * A slave device nacked the address, this is
                                 * allowed only once, DAA will be stopped and
@@ -1263,11 +1268,17 @@ static int svc_i3c_master_send_ccc_cmd(struct i3c_master_controller *m,
 {
        struct svc_i3c_master *master = to_svc_i3c_master(m);
        bool broadcast = cmd->id < 0x80;
+       int ret;
 
        if (broadcast)
-               return svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
+               ret = svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
        else
-               return svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+               ret = svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+
+       if (ret)
+               cmd->err = I3C_ERROR_M2;
+
+       return ret;
 }
 
 static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev,
@@ -1518,8 +1529,8 @@ static int svc_i3c_master_probe(struct platform_device *pdev)
                return PTR_ERR(master->sclk);
 
        master->irq = platform_get_irq(pdev, 0);
-       if (master->irq <= 0)
-               return -ENOENT;
+       if (master->irq < 0)
+               return master->irq;
 
        master->dev = dev;
 
index 5a2c2fb..fe73b26 100644 (file)
@@ -25,6 +25,7 @@ if GAMEPORT
 
 config GAMEPORT_NS558
        tristate "Classic ISA and PnP gameport support"
+       depends on ISA
        help
          Say Y here if you have an ISA or PnP gameport.
 
@@ -35,6 +36,7 @@ config GAMEPORT_NS558
 
 config GAMEPORT_L4
        tristate "PDPI Lightning 4 gamecard support"
+       depends on ISA
        help
          Say Y here if you have a PDPI Lightning 4 gamecard.
 
@@ -53,7 +55,7 @@ config GAMEPORT_EMU10K1
 
 config GAMEPORT_FM801
        tristate "ForteMedia FM801 gameport support"
-       depends on PCI
+       depends on PCI && HAS_IOPORT
        help
          Say Y here if you have ForteMedia FM801 PCI audio controller
          (Abit AU10, Genius Sound Maker, HP Workstation zx2000,
index a144332..34f416a 100644 (file)
@@ -519,12 +519,32 @@ EXPORT_SYMBOL(gameport_set_phys);
 
 static void gameport_default_trigger(struct gameport *gameport)
 {
+#ifdef CONFIG_HAS_IOPORT
        outb(0xff, gameport->io);
+#endif
 }
 
 static unsigned char gameport_default_read(struct gameport *gameport)
 {
+#ifdef CONFIG_HAS_IOPORT
        return inb(gameport->io);
+#else
+       return 0xff;
+#endif
+}
+
+static void gameport_setup_default_handlers(struct gameport *gameport)
+{
+       if ((!gameport->trigger || !gameport->read) &&
+           !IS_ENABLED(CONFIG_HAS_IOPORT))
+               dev_err(&gameport->dev,
+                       "I/O port access is required for %s (%s) but is not available\n",
+                       gameport->phys, gameport->name);
+
+       if (!gameport->trigger)
+               gameport->trigger = gameport_default_trigger;
+       if (!gameport->read)
+               gameport->read = gameport_default_read;
 }
 
 /*
@@ -545,11 +565,7 @@ static void gameport_init_port(struct gameport *gameport)
        if (gameport->parent)
                gameport->dev.parent = &gameport->parent->dev;
 
-       if (!gameport->trigger)
-               gameport->trigger = gameport_default_trigger;
-       if (!gameport->read)
-               gameport->read = gameport_default_read;
-
+       gameport_setup_default_handlers(gameport);
        INIT_LIST_HEAD(&gameport->node);
        spin_lock_init(&gameport->timer_lock);
        timer_setup(&gameport->poll_timer, gameport_run_poll_handler, 0);
index cdb1933..ede3805 100644 (file)
@@ -264,6 +264,7 @@ static const struct xpad_device {
        { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE },
        { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+       { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
        { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
        { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
@@ -365,6 +366,7 @@ static const struct xpad_device {
        { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
        { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
+       { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
        { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
        { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
        { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
@@ -499,6 +501,8 @@ static const struct usb_device_id xpad_table[] = {
        XPAD_XBOX360_VENDOR(0x2f24),            /* GameSir controllers */
        XPAD_XBOX360_VENDOR(0x31e3),            /* Wooting Keyboards */
        XPAD_XBOX360_VENDOR(0x3285),            /* Nacon GC-100 */
+       XPAD_XBOX360_VENDOR(0x3537),            /* GameSir Controllers */
+       XPAD_XBOXONE_VENDOR(0x3537),            /* GameSir Controllers */
        { }
 };
 
@@ -1720,6 +1724,27 @@ static int xpad_start_input(struct usb_xpad *xpad)
                        return error;
                }
        }
+       if (xpad->xtype == XTYPE_XBOX360) {
+               /*
+                * Some third-party controllers Xbox 360-style controllers
+                * require this message to finish initialization.
+                */
+               u8 dummy[20];
+
+               error = usb_control_msg_recv(xpad->udev, 0,
+                                            /* bRequest */ 0x01,
+                                            /* bmRequestType */
+                                            USB_TYPE_VENDOR | USB_DIR_IN |
+                                               USB_RECIP_INTERFACE,
+                                            /* wValue */ 0x100,
+                                            /* wIndex */ 0x00,
+                                            dummy, sizeof(dummy),
+                                            25, GFP_KERNEL);
+               if (error)
+                       dev_warn(&xpad->dev->dev,
+                                "unable to receive magic message: %d\n",
+                                error);
+       }
 
        return 0;
 }
index 896a5a9..61e8e43 100644 (file)
@@ -713,17 +713,11 @@ static int adp5588_fw_parse(struct adp5588_kpad *kpad)
        return 0;
 }
 
-static void adp5588_disable_regulator(void *reg)
-{
-       regulator_disable(reg);
-}
-
 static int adp5588_probe(struct i2c_client *client)
 {
        struct adp5588_kpad *kpad;
        struct input_dev *input;
        struct gpio_desc *gpio;
-       struct regulator *vcc;
        unsigned int revid;
        int ret;
        int error;
@@ -749,16 +743,7 @@ static int adp5588_probe(struct i2c_client *client)
        if (error)
                return error;
 
-       vcc = devm_regulator_get(&client->dev, "vcc");
-       if (IS_ERR(vcc))
-               return PTR_ERR(vcc);
-
-       error = regulator_enable(vcc);
-       if (error)
-               return error;
-
-       error = devm_add_action_or_reset(&client->dev,
-                                        adp5588_disable_regulator, vcc);
+       error = devm_regulator_get_enable(&client->dev, "vcc");
        if (error)
                return error;
 
index a20a4e1..e305c44 100644 (file)
@@ -196,7 +196,7 @@ static int __init amikbd_probe(struct platform_device *pdev)
        struct input_dev *dev;
        int i, err;
 
-       dev = input_allocate_device();
+       dev = devm_input_allocate_device(&pdev->dev);
        if (!dev) {
                dev_err(&pdev->dev, "Not enough memory for input device\n");
                return -ENOMEM;
@@ -208,7 +208,6 @@ static int __init amikbd_probe(struct platform_device *pdev)
        dev->id.vendor = 0x0001;
        dev->id.product = 0x0001;
        dev->id.version = 0x0100;
-       dev->dev.parent = &pdev->dev;
 
        dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
@@ -218,35 +217,21 @@ static int __init amikbd_probe(struct platform_device *pdev)
        amikbd_init_console_keymaps();
 
        ciaa.cra &= ~0x41;       /* serial data in, turn off TA */
-       err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
-                         dev);
+       err = devm_request_irq(&pdev->dev, IRQ_AMIGA_CIAA_SP, amikbd_interrupt,
+                              0, "amikbd", dev);
        if (err)
-               goto fail2;
+               return err;
 
        err = input_register_device(dev);
        if (err)
-               goto fail3;
+               return err;
 
        platform_set_drvdata(pdev, dev);
 
        return 0;
-
- fail3:        free_irq(IRQ_AMIGA_CIAA_SP, dev);
- fail2:        input_free_device(dev);
-       return err;
-}
-
-static int __exit amikbd_remove(struct platform_device *pdev)
-{
-       struct input_dev *dev = platform_get_drvdata(pdev);
-
-       free_irq(IRQ_AMIGA_CIAA_SP, dev);
-       input_unregister_device(dev);
-       return 0;
 }
 
 static struct platform_driver amikbd_driver = {
-       .remove = __exit_p(amikbd_remove),
        .driver   = {
                .name   = "amiga-keyboard",
        },
index 56a919e..f3c3746 100644 (file)
@@ -307,7 +307,6 @@ static int bcm_kp_probe(struct platform_device *pdev)
 {
        struct bcm_kp *kp;
        struct input_dev *input_dev;
-       struct resource *res;
        int error;
 
        kp = devm_kzalloc(&pdev->dev, sizeof(*kp), GFP_KERNEL);
@@ -353,29 +352,16 @@ static int bcm_kp_probe(struct platform_device *pdev)
                return error;
        }
 
-       /* Get the KEYPAD base address */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Missing keypad base address resource\n");
-               return -ENODEV;
-       }
-
-       kp->base = devm_ioremap_resource(&pdev->dev, res);
+       kp->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kp->base))
                return PTR_ERR(kp->base);
 
        /* Enable clock */
-       kp->clk = devm_clk_get(&pdev->dev, "peri_clk");
+       kp->clk = devm_clk_get_optional(&pdev->dev, "peri_clk");
        if (IS_ERR(kp->clk)) {
-               error = PTR_ERR(kp->clk);
-               if (error != -ENOENT) {
-                       if (error != -EPROBE_DEFER)
-                               dev_err(&pdev->dev, "Failed to get clock\n");
-                       return error;
-               }
-               dev_dbg(&pdev->dev,
-                       "No clock specified. Assuming it's enabled\n");
-               kp->clk = NULL;
+               return dev_err_probe(&pdev->dev, PTR_ERR(kp->clk), "Failed to get clock\n");
+       } else if (!kp->clk) {
+               dev_dbg(&pdev->dev, "No clock specified. Assuming it's enabled\n");
        } else {
                unsigned int desired_rate;
                long actual_rate;
index c928829..2e7c2c0 100644 (file)
@@ -523,18 +523,15 @@ static int gpio_keys_setup_key(struct platform_device *pdev,
                                                     NULL, GPIOD_IN, desc);
                if (IS_ERR(bdata->gpiod)) {
                        error = PTR_ERR(bdata->gpiod);
-                       if (error == -ENOENT) {
-                               /*
-                                * GPIO is optional, we may be dealing with
-                                * purely interrupt-driven setup.
-                                */
-                               bdata->gpiod = NULL;
-                       } else {
-                               if (error != -EPROBE_DEFER)
-                                       dev_err(dev, "failed to get gpio: %d\n",
-                                               error);
-                               return error;
-                       }
+                       if (error != -ENOENT)
+                               return dev_err_probe(dev, error,
+                                                    "failed to get gpio\n");
+
+                       /*
+                        * GPIO is optional, we may be dealing with
+                        * purely interrupt-driven setup.
+                        */
+                       bdata->gpiod = NULL;
                }
        } else if (gpio_is_valid(button->gpio)) {
                /*
index c3937d2..ba00ecf 100644 (file)
@@ -299,13 +299,9 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
                                                             NULL, GPIOD_IN,
                                                             button->desc);
                        if (IS_ERR(bdata->gpiod)) {
-                               error = PTR_ERR(bdata->gpiod);
-                               if (error != -EPROBE_DEFER)
-                                       dev_err(dev,
-                                               "failed to get gpio: %d\n",
-                                               error);
                                fwnode_handle_put(child);
-                               return error;
+                               return dev_err_probe(dev, PTR_ERR(bdata->gpiod),
+                                                    "failed to get gpio\n");
                        }
                } else if (gpio_is_valid(button->gpio)) {
                        /*
index 3964f6e..7bee93e 100644 (file)
@@ -556,6 +556,7 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
                    const char *name)
 {
        struct lm8323_pwm *pwm;
+       int err;
 
        BUG_ON(id > 3);
 
@@ -575,9 +576,11 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
                pwm->cdev.name = name;
                pwm->cdev.brightness_set = lm8323_pwm_set_brightness;
                pwm->cdev.groups = lm8323_pwm_groups;
-               if (led_classdev_register(dev, &pwm->cdev) < 0) {
-                       dev_err(dev, "couldn't register PWM %d\n", id);
-                       return -1;
+
+               err = devm_led_classdev_register(dev, &pwm->cdev);
+               if (err) {
+                       dev_err(dev, "couldn't register PWM %d: %d\n", id, err);
+                       return err;
                }
                pwm->enabled = true;
        }
@@ -585,8 +588,6 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
        return 0;
 }
 
-static struct i2c_driver lm8323_i2c_driver;
-
 static ssize_t lm8323_show_disable(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
@@ -615,6 +616,12 @@ static ssize_t lm8323_set_disable(struct device *dev,
 }
 static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable);
 
+static struct attribute *lm8323_attrs[] = {
+       &dev_attr_disable_kp.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(lm8323);
+
 static int lm8323_probe(struct i2c_client *client)
 {
        struct lm8323_platform_data *pdata = dev_get_platdata(&client->dev);
@@ -642,12 +649,13 @@ static int lm8323_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       lm = kzalloc(sizeof *lm, GFP_KERNEL);
-       idev = input_allocate_device();
-       if (!lm || !idev) {
-               err = -ENOMEM;
-               goto fail1;
-       }
+       lm = devm_kzalloc(&client->dev, sizeof(*lm), GFP_KERNEL);
+       if (!lm)
+               return -ENOMEM;
+
+       idev = devm_input_allocate_device(&client->dev);
+       if (!idev)
+               return -ENOMEM;
 
        lm->client = client;
        lm->idev = idev;
@@ -663,8 +671,10 @@ static int lm8323_probe(struct i2c_client *client)
 
        lm8323_reset(lm);
 
-       /* Nothing's set up to service the IRQ yet, so just spin for max.
-        * 100ms until we can configure. */
+       /*
+        * Nothing's set up to service the IRQ yet, so just spin for max.
+        * 100ms until we can configure.
+        */
        tmo = jiffies + msecs_to_jiffies(100);
        while (lm8323_read(lm, LM8323_CMD_READ_INT, data, 1) == 1) {
                if (data[0] & INT_NOINIT)
@@ -684,21 +694,17 @@ static int lm8323_probe(struct i2c_client *client)
        /* If a true probe check the device */
        if (lm8323_read_id(lm, data) != 0) {
                dev_err(&client->dev, "device not found\n");
-               err = -ENODEV;
-               goto fail1;
+               return -ENODEV;
        }
 
        for (pwm = 0; pwm < LM8323_NUM_PWMS; pwm++) {
                err = init_pwm(lm, pwm + 1, &client->dev,
                               pdata->pwm_names[pwm]);
-               if (err < 0)
-                       goto fail2;
+               if (err)
+                       return err;
        }
 
        lm->kp_enabled = true;
-       err = device_create_file(&client->dev, &dev_attr_disable_kp);
-       if (err < 0)
-               goto fail2;
 
        idev->name = pdata->name ? : "LM8323 keypad";
        snprintf(lm->phys, sizeof(lm->phys),
@@ -719,14 +725,16 @@ static int lm8323_probe(struct i2c_client *client)
        err = input_register_device(idev);
        if (err) {
                dev_dbg(&client->dev, "error registering input device\n");
-               goto fail3;
+               return err;
        }
 
-       err = request_threaded_irq(client->irq, NULL, lm8323_irq,
-                         IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, lm8323_irq,
+                                       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                       "lm8323", lm);
        if (err) {
                dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
-               goto fail4;
+               return err;
        }
 
        i2c_set_clientdata(client, lm);
@@ -735,39 +743,6 @@ static int lm8323_probe(struct i2c_client *client)
        enable_irq_wake(client->irq);
 
        return 0;
-
-fail4:
-       input_unregister_device(idev);
-       idev = NULL;
-fail3:
-       device_remove_file(&client->dev, &dev_attr_disable_kp);
-fail2:
-       while (--pwm >= 0)
-               if (lm->pwm[pwm].enabled)
-                       led_classdev_unregister(&lm->pwm[pwm].cdev);
-fail1:
-       input_free_device(idev);
-       kfree(lm);
-       return err;
-}
-
-static void lm8323_remove(struct i2c_client *client)
-{
-       struct lm8323_chip *lm = i2c_get_clientdata(client);
-       int i;
-
-       disable_irq_wake(client->irq);
-       free_irq(client->irq, lm);
-
-       input_unregister_device(lm->idev);
-
-       device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
-
-       for (i = 0; i < 3; i++)
-               if (lm->pwm[i].enabled)
-                       led_classdev_unregister(&lm->pwm[i].cdev);
-
-       kfree(lm);
 }
 
 /*
@@ -823,11 +798,11 @@ static const struct i2c_device_id lm8323_id[] = {
 
 static struct i2c_driver lm8323_i2c_driver = {
        .driver = {
-               .name   = "lm8323",
-               .pm     = pm_sleep_ptr(&lm8323_pm_ops),
+               .name           = "lm8323",
+               .pm             = pm_sleep_ptr(&lm8323_pm_ops),
+               .dev_groups     = lm8323_groups,
        },
        .probe          = lm8323_probe,
-       .remove         = lm8323_remove,
        .id_table       = lm8323_id,
 };
 MODULE_DEVICE_TABLE(i2c, lm8323_id);
index c9f0576..1c070c4 100644 (file)
@@ -142,18 +142,18 @@ static int lm8333_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       lm8333 = kzalloc(sizeof(*lm8333), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!lm8333 || !input) {
-               err = -ENOMEM;
-               goto free_mem;
-       }
+       lm8333 = devm_kzalloc(&client->dev, sizeof(*lm8333), GFP_KERNEL);
+       if (!lm8333)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        lm8333->client = client;
        lm8333->input = input;
 
        input->name = client->name;
-       input->dev.parent = &client->dev;
        input->id.bustype = BUS_I2C;
 
        input_set_capability(input, EV_MSC, MSC_SCAN);
@@ -162,7 +162,7 @@ static int lm8333_probe(struct i2c_client *client)
                                         LM8333_NUM_ROWS, LM8333_NUM_COLS,
                                         lm8333->keycodes, input);
        if (err)
-               goto free_mem;
+               return err;
 
        if (pdata->debounce_time) {
                err = lm8333_write8(lm8333, LM8333_DEBOUNCE,
@@ -178,34 +178,19 @@ static int lm8333_probe(struct i2c_client *client)
                        dev_warn(&client->dev, "Unable to set active time\n");
        }
 
-       err = request_threaded_irq(client->irq, NULL, lm8333_irq_thread,
-                                  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-                                  "lm8333", lm8333);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, lm8333_irq_thread,
+                                       IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                       "lm8333", lm8333);
        if (err)
-               goto free_mem;
+               return err;
 
        err = input_register_device(input);
        if (err)
-               goto free_irq;
+               return err;
 
        i2c_set_clientdata(client, lm8333);
        return 0;
-
- free_irq:
-       free_irq(client->irq, lm8333);
- free_mem:
-       input_free_device(input);
-       kfree(lm8333);
-       return err;
-}
-
-static void lm8333_remove(struct i2c_client *client)
-{
-       struct lm8333 *lm8333 = i2c_get_clientdata(client);
-
-       free_irq(client->irq, lm8333);
-       input_unregister_device(lm8333->input);
-       kfree(lm8333);
 }
 
 static const struct i2c_device_id lm8333_id[] = {
@@ -219,7 +204,6 @@ static struct i2c_driver lm8333_driver = {
                .name           = "lm8333",
        },
        .probe          = lm8333_probe,
-       .remove         = lm8333_remove,
        .id_table       = lm8333_id,
 };
 module_i2c_driver(lm8333_driver);
index 911e118..322a878 100644 (file)
@@ -160,17 +160,10 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 {
        struct lpc32xx_kscan_drv *kscandat;
        struct input_dev *input;
-       struct resource *res;
        size_t keymap_size;
        int error;
        int irq;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "failed to get platform I/O memory\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -EINVAL;
@@ -221,7 +214,7 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 
        input_set_drvdata(kscandat->input, kscandat);
 
-       kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res);
+       kscandat->kscan_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kscandat->kscan_base))
                return PTR_ERR(kscandat->kscan_base);
 
index de312d8..2410f67 100644 (file)
@@ -92,6 +92,13 @@ static irqreturn_t mcs_touchkey_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void mcs_touchkey_poweroff(void *data)
+{
+       struct mcs_touchkey_data *touchkey = data;
+
+       touchkey->poweron(false);
+}
+
 static int mcs_touchkey_probe(struct i2c_client *client)
 {
        const struct i2c_device_id *id = i2c_client_get_device_id(client);
@@ -109,13 +116,16 @@ static int mcs_touchkey_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       data = kzalloc(struct_size(data, keycodes, pdata->key_maxval + 1),
-                      GFP_KERNEL);
-       input_dev = input_allocate_device();
-       if (!data || !input_dev) {
-               dev_err(&client->dev, "Failed to allocate memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
+       data = devm_kzalloc(&client->dev,
+                           struct_size(data, keycodes, pdata->key_maxval + 1),
+                           GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       input_dev = devm_input_allocate_device(&client->dev);
+       if (!input_dev) {
+               dev_err(&client->dev, "Failed to allocate input device\n");
+               return -ENOMEM;
        }
 
        data->client = client;
@@ -136,15 +146,13 @@ static int mcs_touchkey_probe(struct i2c_client *client)
 
        fw_ver = i2c_smbus_read_byte_data(client, fw_reg);
        if (fw_ver < 0) {
-               error = fw_ver;
-               dev_err(&client->dev, "i2c read error[%d]\n", error);
-               goto err_free_mem;
+               dev_err(&client->dev, "i2c read error[%d]\n", fw_ver);
+               return fw_ver;
        }
        dev_info(&client->dev, "Firmware version: %d\n", fw_ver);
 
        input_dev->name = "MELFAS MCS Touchkey";
        input_dev->id.bustype = BUS_I2C;
-       input_dev->dev.parent = &client->dev;
        input_dev->evbit[0] = BIT_MASK(EV_KEY);
        if (!pdata->no_autorepeat)
                input_dev->evbit[0] |= BIT_MASK(EV_REP);
@@ -169,40 +177,28 @@ static int mcs_touchkey_probe(struct i2c_client *client)
        if (pdata->poweron) {
                data->poweron = pdata->poweron;
                data->poweron(true);
+
+               error = devm_add_action_or_reset(&client->dev,
+                                                mcs_touchkey_poweroff, data);
+               if (error)
+                       return error;
        }
 
-       error = request_threaded_irq(client->irq, NULL, mcs_touchkey_interrupt,
-                                    IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-                                    client->dev.driver->name, data);
+       error = devm_request_threaded_irq(&client->dev, client->irq,
+                                         NULL, mcs_touchkey_interrupt,
+                                         IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                         client->dev.driver->name, data);
        if (error) {
                dev_err(&client->dev, "Failed to register interrupt\n");
-               goto err_free_mem;
+               return error;
        }
 
        error = input_register_device(input_dev);
        if (error)
-               goto err_free_irq;
+               return error;
 
        i2c_set_clientdata(client, data);
        return 0;
-
-err_free_irq:
-       free_irq(client->irq, data);
-err_free_mem:
-       input_free_device(input_dev);
-       kfree(data);
-       return error;
-}
-
-static void mcs_touchkey_remove(struct i2c_client *client)
-{
-       struct mcs_touchkey_data *data = i2c_get_clientdata(client);
-
-       free_irq(client->irq, data);
-       if (data->poweron)
-               data->poweron(false);
-       input_unregister_device(data->input_dev);
-       kfree(data);
 }
 
 static void mcs_touchkey_shutdown(struct i2c_client *client)
@@ -259,7 +255,6 @@ static struct i2c_driver mcs_touchkey_driver = {
                .pm     = pm_sleep_ptr(&mcs_touchkey_pm_ops),
        },
        .probe          = mcs_touchkey_probe,
-       .remove         = mcs_touchkey_remove,
        .shutdown       = mcs_touchkey_shutdown,
        .id_table       = mcs_touchkey_id,
 };
index 970f2a6..b3ccc97 100644 (file)
@@ -221,13 +221,20 @@ static irqreturn_t ske_keypad_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void ske_keypad_board_exit(void *data)
+{
+       struct ske_keypad *keypad = data;
+
+       keypad->board->exit();
+}
+
 static int __init ske_keypad_probe(struct platform_device *pdev)
 {
        const struct ske_keypad_platform_data *plat =
                        dev_get_platdata(&pdev->dev);
+       struct device *dev = &pdev->dev;
        struct ske_keypad *keypad;
        struct input_dev *input;
-       struct resource *res;
        int irq;
        int error;
 
@@ -238,20 +245,14 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
-               return -EINVAL;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "missing platform resources\n");
-               return -EINVAL;
-       }
+               return irq;
 
-       keypad = kzalloc(sizeof(struct ske_keypad), GFP_KERNEL);
-       input = input_allocate_device();
+       keypad = devm_kzalloc(dev, sizeof(struct ske_keypad),
+                             GFP_KERNEL);
+       input = devm_input_allocate_device(dev);
        if (!keypad || !input) {
                dev_err(&pdev->dev, "failed to allocate keypad memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
+               return -ENOMEM;
        }
 
        keypad->irq = irq;
@@ -259,31 +260,20 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
        keypad->input = input;
        spin_lock_init(&keypad->ske_keypad_lock);
 
-       if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
-               dev_err(&pdev->dev, "failed to request I/O memory\n");
-               error = -EBUSY;
-               goto err_free_mem;
-       }
-
-       keypad->reg_base = ioremap(res->start, resource_size(res));
-       if (!keypad->reg_base) {
-               dev_err(&pdev->dev, "failed to remap I/O memory\n");
-               error = -ENXIO;
-               goto err_free_mem_region;
-       }
+       keypad->reg_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(keypad->reg_base))
+               return PTR_ERR(keypad->reg_base);
 
-       keypad->pclk = clk_get(&pdev->dev, "apb_pclk");
+       keypad->pclk = devm_clk_get_enabled(dev, "apb_pclk");
        if (IS_ERR(keypad->pclk)) {
                dev_err(&pdev->dev, "failed to get pclk\n");
-               error = PTR_ERR(keypad->pclk);
-               goto err_iounmap;
+               return PTR_ERR(keypad->pclk);
        }
 
-       keypad->clk = clk_get(&pdev->dev, NULL);
+       keypad->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(keypad->clk)) {
                dev_err(&pdev->dev, "failed to get clk\n");
-               error = PTR_ERR(keypad->clk);
-               goto err_pclk;
+               return PTR_ERR(keypad->clk);
        }
 
        input->id.bustype = BUS_HOST;
@@ -295,48 +285,43 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
                                           keypad->keymap, input);
        if (error) {
                dev_err(&pdev->dev, "Failed to build keymap\n");
-               goto err_clk;
+               return error;
        }
 
        input_set_capability(input, EV_MSC, MSC_SCAN);
        if (!plat->no_autorepeat)
                __set_bit(EV_REP, input->evbit);
 
-       error = clk_prepare_enable(keypad->pclk);
-       if (error) {
-               dev_err(&pdev->dev, "Failed to prepare/enable pclk\n");
-               goto err_clk;
-       }
-
-       error = clk_prepare_enable(keypad->clk);
-       if (error) {
-               dev_err(&pdev->dev, "Failed to prepare/enable clk\n");
-               goto err_pclk_disable;
-       }
-
-
        /* go through board initialization helpers */
        if (keypad->board->init)
                keypad->board->init();
 
+       if (keypad->board->exit) {
+               error = devm_add_action_or_reset(dev, ske_keypad_board_exit,
+                                                keypad);
+               if (error)
+                       return error;
+       }
+
        error = ske_keypad_chip_init(keypad);
        if (error) {
                dev_err(&pdev->dev, "unable to init keypad hardware\n");
-               goto err_clk_disable;
+               return error;
        }
 
-       error = request_threaded_irq(keypad->irq, NULL, ske_keypad_irq,
-                                    IRQF_ONESHOT, "ske-keypad", keypad);
+       error = devm_request_threaded_irq(dev, keypad->irq,
+                                         NULL, ske_keypad_irq,
+                                         IRQF_ONESHOT, "ske-keypad", keypad);
        if (error) {
                dev_err(&pdev->dev, "allocate irq %d failed\n", keypad->irq);
-               goto err_clk_disable;
+               return error;
        }
 
        error = input_register_device(input);
        if (error) {
                dev_err(&pdev->dev,
-                               "unable to register input device: %d\n", error);
-               goto err_free_irq;
+                       "unable to register input device: %d\n", error);
+               return error;
        }
 
        if (plat->wakeup_enable)
@@ -345,47 +330,6 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, keypad);
 
        return 0;
-
-err_free_irq:
-       free_irq(keypad->irq, keypad);
-err_clk_disable:
-       clk_disable_unprepare(keypad->clk);
-err_pclk_disable:
-       clk_disable_unprepare(keypad->pclk);
-err_clk:
-       clk_put(keypad->clk);
-err_pclk:
-       clk_put(keypad->pclk);
-err_iounmap:
-       iounmap(keypad->reg_base);
-err_free_mem_region:
-       release_mem_region(res->start, resource_size(res));
-err_free_mem:
-       input_free_device(input);
-       kfree(keypad);
-       return error;
-}
-
-static int ske_keypad_remove(struct platform_device *pdev)
-{
-       struct ske_keypad *keypad = platform_get_drvdata(pdev);
-       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       free_irq(keypad->irq, keypad);
-
-       input_unregister_device(keypad->input);
-
-       clk_disable_unprepare(keypad->clk);
-       clk_put(keypad->clk);
-
-       if (keypad->board->exit)
-               keypad->board->exit();
-
-       iounmap(keypad->reg_base);
-       release_mem_region(res->start, resource_size(res));
-       kfree(keypad);
-
-       return 0;
 }
 
 static int ske_keypad_suspend(struct device *dev)
@@ -424,7 +368,6 @@ static struct platform_driver ske_keypad_driver = {
                .name = "nmk-ske-keypad",
                .pm = pm_sleep_ptr(&ske_keypad_dev_pm_ops),
        },
-       .remove = ske_keypad_remove,
 };
 
 module_platform_driver_probe(ske_keypad_driver, ske_keypad_probe);
index e9fa142..096c18d 100644 (file)
@@ -186,8 +186,7 @@ static int nspire_keypad_probe(struct platform_device *pdev)
                return PTR_ERR(keypad->clk);
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       keypad->reg_base = devm_ioremap_resource(&pdev->dev, res);
+       keypad->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(keypad->reg_base))
                return PTR_ERR(keypad->reg_base);
 
index 9f085d5..773e55e 100644 (file)
@@ -341,17 +341,10 @@ static int omap4_keypad_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct omap4_keypad *keypad_data;
        struct input_dev *input_dev;
-       struct resource *res;
        unsigned int max_keys;
        int irq;
        int error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "no base address specified\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
@@ -370,7 +363,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
        if (error)
                return error;
 
-       keypad_data->base = devm_ioremap_resource(dev, res);
+       keypad_data->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(keypad_data->base))
                return PTR_ERR(keypad_data->base);
 
index b0ea387..7ffe1a7 100644 (file)
@@ -39,15 +39,8 @@ static int opencores_kbd_probe(struct platform_device *pdev)
 {
        struct input_dev *input;
        struct opencores_kbd *opencores_kbd;
-       struct resource *res;
        int irq, i, error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "missing board memory resource\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -EINVAL;
@@ -65,7 +58,7 @@ static int opencores_kbd_probe(struct platform_device *pdev)
 
        opencores_kbd->input = input;
 
-       opencores_kbd->addr = devm_ioremap_resource(&pdev->dev, res);
+       opencores_kbd->addr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(opencores_kbd->addr))
                return PTR_ERR(opencores_kbd->addr);
 
index 038ff35..147b1f2 100644 (file)
@@ -318,40 +318,22 @@ static void ppkb_close(struct input_dev *input)
        ppkb_set_scan(client, false);
 }
 
-static void ppkb_regulator_disable(void *regulator)
-{
-       regulator_disable(regulator);
-}
-
 static int ppkb_probe(struct i2c_client *client)
 {
        struct device *dev = &client->dev;
        unsigned int phys_rows, phys_cols;
        struct pinephone_keyboard *ppkb;
-       struct regulator *vbat_supply;
        u8 info[PPKB_MATRIX_SIZE + 1];
        struct device_node *i2c_bus;
        int ret;
        int error;
 
-       vbat_supply = devm_regulator_get(dev, "vbat");
-       error = PTR_ERR_OR_ZERO(vbat_supply);
+       error = devm_regulator_get_enable(dev, "vbat");
        if (error) {
                dev_err(dev, "Failed to get VBAT supply: %d\n", error);
                return error;
        }
 
-       error = regulator_enable(vbat_supply);
-       if (error) {
-               dev_err(dev, "Failed to enable VBAT: %d\n", error);
-               return error;
-       }
-
-       error = devm_add_action_or_reset(dev, ppkb_regulator_disable,
-                                        vbat_supply);
-       if (error)
-               return error;
-
        ret = i2c_smbus_read_i2c_block_data(client, 0, sizeof(info), info);
        if (ret != sizeof(info)) {
                error = ret < 0 ? ret : -EIO;
index 871f858..3724363 100644 (file)
@@ -717,7 +717,6 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct pxa27x_keypad *keypad;
        struct input_dev *input_dev;
-       struct resource *res;
        int irq, error;
 
        /* Driver need build keycode from device tree or pdata */
@@ -728,12 +727,6 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        if (irq < 0)
                return -ENXIO;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (res == NULL) {
-               dev_err(&pdev->dev, "failed to get I/O memory\n");
-               return -ENXIO;
-       }
-
        keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad),
                              GFP_KERNEL);
        if (!keypad)
@@ -747,7 +740,7 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        keypad->input_dev = input_dev;
        keypad->irq = irq;
 
-       keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+       keypad->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(keypad->mmio_base))
                return PTR_ERR(keypad->mmio_base);
 
index 91aaa9f..9b093b0 100644 (file)
@@ -149,20 +149,20 @@ static int qt1070_probe(struct i2c_client *client)
        if (!qt1070_identify(client))
                return -ENODEV;
 
-       data = kzalloc(sizeof(struct qt1070_data), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!data || !input) {
-               dev_err(&client->dev, "insufficient memory\n");
-               err = -ENOMEM;
-               goto err_free_mem;
-       }
+       data = devm_kzalloc(&client->dev, sizeof(struct qt1070_data),
+                           GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        data->client = client;
        data->input = input;
        data->irq = client->irq;
 
        input->name = "AT42QT1070 QTouch Sensor";
-       input->dev.parent = &client->dev;
        input->id.bustype = BUS_I2C;
 
        /* Add the keycode */
@@ -185,19 +185,20 @@ static int qt1070_probe(struct i2c_client *client)
        qt1070_write(client, RESET, 1);
        msleep(QT1070_RESET_TIME);
 
-       err = request_threaded_irq(client->irq, NULL, qt1070_interrupt,
-                                  IRQF_TRIGGER_NONE | IRQF_ONESHOT,
-                                  client->dev.driver->name, data);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, qt1070_interrupt,
+                                       IRQF_TRIGGER_NONE | IRQF_ONESHOT,
+                                       client->dev.driver->name, data);
        if (err) {
                dev_err(&client->dev, "fail to request irq\n");
-               goto err_free_mem;
+               return err;
        }
 
        /* Register the input device */
        err = input_register_device(data->input);
        if (err) {
                dev_err(&client->dev, "Failed to register input device\n");
-               goto err_free_irq;
+               return err;
        }
 
        i2c_set_clientdata(client, data);
@@ -206,24 +207,6 @@ static int qt1070_probe(struct i2c_client *client)
        qt1070_read(client, DET_STATUS);
 
        return 0;
-
-err_free_irq:
-       free_irq(client->irq, data);
-err_free_mem:
-       input_free_device(input);
-       kfree(data);
-       return err;
-}
-
-static void qt1070_remove(struct i2c_client *client)
-{
-       struct qt1070_data *data = i2c_get_clientdata(client);
-
-       /* Release IRQ */
-       free_irq(client->irq, data);
-
-       input_unregister_device(data->input);
-       kfree(data);
 }
 
 static int qt1070_suspend(struct device *dev)
@@ -272,7 +255,6 @@ static struct i2c_driver qt1070_driver = {
        },
        .id_table       = qt1070_id,
        .probe          = qt1070_probe,
-       .remove         = qt1070_remove,
 };
 
 module_i2c_driver(qt1070_driver);
index 599ea85..7e3b096 100644 (file)
@@ -32,7 +32,7 @@
 
 #define QT2160_NUM_LEDS_X      8
 
-#define QT2160_CYCLE_INTERVAL  (2*HZ)
+#define QT2160_CYCLE_INTERVAL  2000 /* msec - 2 sec */
 
 static unsigned char qt2160_key2code[] = {
        KEY_0, KEY_1, KEY_2, KEY_3,
@@ -54,7 +54,6 @@ struct qt2160_led {
 struct qt2160_data {
        struct i2c_client *client;
        struct input_dev *input;
-       struct delayed_work dwork;
        unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
        u16 key_matrix;
 #ifdef CONFIG_LEDS_CLASS
@@ -155,10 +154,10 @@ static int qt2160_read_block(struct i2c_client *client,
        return 0;
 }
 
-static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+static void qt2160_get_key_matrix(struct input_dev *input)
 {
+       struct qt2160_data *qt2160 = input_get_drvdata(input);
        struct i2c_client *client = qt2160->client;
-       struct input_dev *input = qt2160->input;
        u8 regs[6];
        u16 old_matrix, new_matrix;
        int ret, i, mask;
@@ -173,7 +172,7 @@ static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
        if (ret) {
                dev_err(&client->dev,
                        "could not perform chip read.\n");
-               return ret;
+               return;
        }
 
        old_matrix = qt2160->key_matrix;
@@ -191,37 +190,17 @@ static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
        }
 
        input_sync(input);
-
-       return 0;
 }
 
-static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+static irqreturn_t qt2160_irq(int irq, void *data)
 {
-       struct qt2160_data *qt2160 = _qt2160;
+       struct input_dev *input = data;
 
-       mod_delayed_work(system_wq, &qt2160->dwork, 0);
+       qt2160_get_key_matrix(input);
 
        return IRQ_HANDLED;
 }
 
-static void qt2160_schedule_read(struct qt2160_data *qt2160)
-{
-       schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
-}
-
-static void qt2160_worker(struct work_struct *work)
-{
-       struct qt2160_data *qt2160 =
-               container_of(work, struct qt2160_data, dwork.work);
-
-       dev_dbg(&qt2160->client->dev, "worker\n");
-
-       qt2160_get_key_matrix(qt2160);
-
-       /* Avoid device lock up by checking every so often */
-       qt2160_schedule_read(qt2160);
-}
-
 static int qt2160_read(struct i2c_client *client, u8 reg)
 {
        int ret;
@@ -260,7 +239,7 @@ static int qt2160_write(struct i2c_client *client, u8 reg, u8 data)
 static int qt2160_register_leds(struct qt2160_data *qt2160)
 {
        struct i2c_client *client = qt2160->client;
-       int ret;
+       int error;
        int i;
 
        for (i = 0; i < QT2160_NUM_LEDS_X; i++) {
@@ -273,9 +252,9 @@ static int qt2160_register_leds(struct qt2160_data *qt2160)
                led->id = i;
                led->qt2160 = qt2160;
 
-               ret = led_classdev_register(&client->dev, &led->cdev);
-               if (ret < 0)
-                       return ret;
+               error = devm_led_classdev_register(&client->dev, &led->cdev);
+               if (error)
+                       return error;
        }
 
        /* Tur off LEDs */
@@ -286,14 +265,6 @@ static int qt2160_register_leds(struct qt2160_data *qt2160)
        return 0;
 }
 
-static void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
-       int i;
-
-       for (i = 0; i < QT2160_NUM_LEDS_X; i++)
-               led_classdev_unregister(&qt2160->leds[i].cdev);
-}
-
 #else
 
 static inline int qt2160_register_leds(struct qt2160_data *qt2160)
@@ -301,10 +272,6 @@ static inline int qt2160_register_leds(struct qt2160_data *qt2160)
        return 0;
 }
 
-static inline void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
-}
-
 #endif
 
 static bool qt2160_identify(struct i2c_client *client)
@@ -345,12 +312,9 @@ static int qt2160_probe(struct i2c_client *client)
        int i;
        int error;
 
-       /* Check functionality */
-       error = i2c_check_functionality(client->adapter,
-                       I2C_FUNC_SMBUS_BYTE);
-       if (!error) {
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
                dev_err(&client->dev, "%s adapter not supported\n",
-                               dev_driver_string(&client->adapter->dev));
+                       dev_driver_string(&client->adapter->dev));
                return -ENODEV;
        }
 
@@ -358,17 +322,16 @@ static int qt2160_probe(struct i2c_client *client)
                return -ENODEV;
 
        /* Chip is valid and active. Allocate structure */
-       qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!qt2160 || !input) {
-               dev_err(&client->dev, "insufficient memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
-       }
+       qt2160 = devm_kzalloc(&client->dev, sizeof(*qt2160), GFP_KERNEL);
+       if (!qt2160)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        qt2160->client = client;
        qt2160->input = input;
-       INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
 
        input->name = "AT42QT2160 Touch Sense Keyboard";
        input->id.bustype = BUS_I2C;
@@ -385,66 +348,48 @@ static int qt2160_probe(struct i2c_client *client)
        }
        __clear_bit(KEY_RESERVED, input->keybit);
 
+       input_set_drvdata(input, qt2160);
+
        /* Calibrate device */
        error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
        if (error) {
                dev_err(&client->dev, "failed to calibrate device\n");
-               goto err_free_mem;
+               return error;
        }
 
        if (client->irq) {
-               error = request_irq(client->irq, qt2160_irq,
-                                   IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+               error = devm_request_threaded_irq(&client->dev, client->irq,
+                                                 NULL, qt2160_irq,
+                                                 IRQF_ONESHOT,
+                                                 "qt2160", input);
                if (error) {
                        dev_err(&client->dev,
                                "failed to allocate irq %d\n", client->irq);
-                       goto err_free_mem;
+                       return error;
+               }
+       } else {
+               error = input_setup_polling(input, qt2160_get_key_matrix);
+               if (error) {
+                       dev_err(&client->dev, "Failed to setup polling\n");
+                       return error;
                }
+               input_set_poll_interval(input, QT2160_CYCLE_INTERVAL);
        }
 
        error = qt2160_register_leds(qt2160);
        if (error) {
                dev_err(&client->dev, "Failed to register leds\n");
-               goto err_free_irq;
+               return error;
        }
 
        error = input_register_device(qt2160->input);
        if (error) {
                dev_err(&client->dev,
                        "Failed to register input device\n");
-               goto err_unregister_leds;
+               return error;
        }
 
-       i2c_set_clientdata(client, qt2160);
-       qt2160_schedule_read(qt2160);
-
        return 0;
-
-err_unregister_leds:
-       qt2160_unregister_leds(qt2160);
-err_free_irq:
-       if (client->irq)
-               free_irq(client->irq, qt2160);
-err_free_mem:
-       input_free_device(input);
-       kfree(qt2160);
-       return error;
-}
-
-static void qt2160_remove(struct i2c_client *client)
-{
-       struct qt2160_data *qt2160 = i2c_get_clientdata(client);
-
-       qt2160_unregister_leds(qt2160);
-
-       /* Release IRQ so no queue will be scheduled */
-       if (client->irq)
-               free_irq(client->irq, qt2160);
-
-       cancel_delayed_work_sync(&qt2160->dwork);
-
-       input_unregister_device(qt2160->input);
-       kfree(qt2160);
 }
 
 static const struct i2c_device_id qt2160_idtable[] = {
@@ -461,7 +406,6 @@ static struct i2c_driver qt2160_driver = {
 
        .id_table       = qt2160_idtable,
        .probe          = qt2160_probe,
-       .remove         = qt2160_remove,
 };
 
 module_i2c_driver(qt2160_driver);
index 15c15c0..f304cab 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/pm_wakeup.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -307,8 +308,7 @@ static int sun4i_lradc_probe(struct platform_device *pdev)
 
        input_set_drvdata(lradc->input, lradc);
 
-       lradc->base = devm_ioremap_resource(dev,
-                             platform_get_resource(pdev, IORESOURCE_MEM, 0));
+       lradc->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lradc->base))
                return PTR_ERR(lradc->base);
 
index 2f745ca..8af59ce 100644 (file)
@@ -24,6 +24,8 @@
 #define TCA6416_INVERT         2
 #define TCA6416_DIRECTION      3
 
+#define TCA6416_POLL_INTERVAL  100 /* msec */
+
 static const struct i2c_device_id tca6416_id[] = {
        { "tca6416-keys", 16, },
        { "tca6408-keys", 8, },
@@ -43,7 +45,6 @@ struct tca6416_keypad_chip {
 
        struct i2c_client *client;
        struct input_dev *input;
-       struct delayed_work dwork;
        int io_size;
        int irqnum;
        u16 pinmask;
@@ -85,9 +86,9 @@ static int tca6416_read_reg(struct tca6416_keypad_chip *chip, int reg, u16 *val)
        return 0;
 }
 
-static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
+static void tca6416_keys_scan(struct input_dev *input)
 {
-       struct input_dev *input = chip->input;
+       struct tca6416_keypad_chip *chip = input_get_drvdata(input);
        u16 reg_val, val;
        int error, i, pin_index;
 
@@ -122,33 +123,20 @@ static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
  */
 static irqreturn_t tca6416_keys_isr(int irq, void *dev_id)
 {
-       struct tca6416_keypad_chip *chip = dev_id;
-
-       tca6416_keys_scan(chip);
+       tca6416_keys_scan(dev_id);
 
        return IRQ_HANDLED;
 }
 
-static void tca6416_keys_work_func(struct work_struct *work)
-{
-       struct tca6416_keypad_chip *chip =
-               container_of(work, struct tca6416_keypad_chip, dwork.work);
-
-       tca6416_keys_scan(chip);
-       schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
-}
-
 static int tca6416_keys_open(struct input_dev *dev)
 {
        struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
 
-       /* Get initial device state in case it has switches */
-       tca6416_keys_scan(chip);
-
-       if (chip->use_polling)
-               schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
-       else
-               enable_irq(chip->irqnum);
+       if (!chip->use_polling) {
+               /* Get initial device state in case it has switches */
+               tca6416_keys_scan(dev);
+               enable_irq(chip->client->irq);
+       }
 
        return 0;
 }
@@ -157,10 +145,8 @@ static void tca6416_keys_close(struct input_dev *dev)
 {
        struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
 
-       if (chip->use_polling)
-               cancel_delayed_work_sync(&chip->dwork);
-       else
-               disable_irq(chip->irqnum);
+       if (!chip->use_polling)
+               disable_irq(chip->client->irq);
 }
 
 static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
@@ -216,12 +202,15 @@ static int tca6416_keypad_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       chip = kzalloc(struct_size(chip, buttons, pdata->nbuttons), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!chip || !input) {
-               error = -ENOMEM;
-               goto fail1;
-       }
+       chip = devm_kzalloc(&client->dev,
+                           struct_size(chip, buttons, pdata->nbuttons),
+                           GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        chip->client = client;
        chip->input = input;
@@ -229,11 +218,8 @@ static int tca6416_keypad_probe(struct i2c_client *client)
        chip->pinmask = pdata->pinmask;
        chip->use_polling = pdata->use_polling;
 
-       INIT_DELAYED_WORK(&chip->dwork, tca6416_keys_work_func);
-
        input->phys = "tca6416-keys/input0";
        input->name = client->name;
-       input->dev.parent = &client->dev;
 
        input->open = tca6416_keys_open;
        input->close = tca6416_keys_close;
@@ -263,24 +249,28 @@ static int tca6416_keypad_probe(struct i2c_client *client)
         */
        error = tca6416_setup_registers(chip);
        if (error)
-               goto fail1;
+               return error;
 
-       if (!chip->use_polling) {
-               if (pdata->irq_is_gpio)
-                       chip->irqnum = gpio_to_irq(client->irq);
-               else
-                       chip->irqnum = client->irq;
-
-               error = request_threaded_irq(chip->irqnum, NULL,
-                                            tca6416_keys_isr,
-                                            IRQF_TRIGGER_FALLING |
-                                            IRQF_ONESHOT | IRQF_NO_AUTOEN,
-                                            "tca6416-keypad", chip);
+       if (chip->use_polling) {
+               error = input_setup_polling(input, tca6416_keys_scan);
+               if (error) {
+                       dev_err(&client->dev, "Failed to setup polling\n");
+                       return error;
+               }
+
+               input_set_poll_interval(input, TCA6416_POLL_INTERVAL);
+       } else {
+               error = devm_request_threaded_irq(&client->dev, client->irq,
+                                                 NULL, tca6416_keys_isr,
+                                                 IRQF_TRIGGER_FALLING |
+                                                       IRQF_ONESHOT |
+                                                       IRQF_NO_AUTOEN,
+                                                 "tca6416-keypad", input);
                if (error) {
                        dev_dbg(&client->dev,
                                "Unable to claim irq %d; error %d\n",
-                               chip->irqnum, error);
-                       goto fail1;
+                               client->irq, error);
+                       return error;
                }
        }
 
@@ -288,70 +278,19 @@ static int tca6416_keypad_probe(struct i2c_client *client)
        if (error) {
                dev_dbg(&client->dev,
                        "Unable to register input device, error: %d\n", error);
-               goto fail2;
+               return error;
        }
 
        i2c_set_clientdata(client, chip);
-       device_init_wakeup(&client->dev, 1);
 
        return 0;
-
-fail2:
-       if (!chip->use_polling) {
-               free_irq(chip->irqnum, chip);
-               enable_irq(chip->irqnum);
-       }
-fail1:
-       input_free_device(input);
-       kfree(chip);
-       return error;
 }
 
-static void tca6416_keypad_remove(struct i2c_client *client)
-{
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (!chip->use_polling) {
-               free_irq(chip->irqnum, chip);
-               enable_irq(chip->irqnum);
-       }
-
-       input_unregister_device(chip->input);
-       kfree(chip);
-}
-
-static int tca6416_keypad_suspend(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (device_may_wakeup(dev))
-               enable_irq_wake(chip->irqnum);
-
-       return 0;
-}
-
-static int tca6416_keypad_resume(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (device_may_wakeup(dev))
-               disable_irq_wake(chip->irqnum);
-
-       return 0;
-}
-
-static DEFINE_SIMPLE_DEV_PM_OPS(tca6416_keypad_dev_pm_ops,
-                               tca6416_keypad_suspend, tca6416_keypad_resume);
-
 static struct i2c_driver tca6416_keypad_driver = {
        .driver = {
                .name   = "tca6416-keypad",
-               .pm     = pm_sleep_ptr(&tca6416_keypad_dev_pm_ops),
        },
        .probe          = tca6416_keypad_probe,
-       .remove         = tca6416_keypad_remove,
        .id_table       = tca6416_id,
 };
 
index d5a6c7d..c9a823e 100644 (file)
@@ -640,7 +640,7 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 
        timer_setup(&kbc->timer, tegra_kbc_keypress_timer, 0);
 
-       kbc->mmio = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+       kbc->mmio = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kbc->mmio))
                return PTR_ERR(kbc->mmio);
 
index 75bd3ea..0fd761a 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm.h>
 #include <linux/regulator/consumer.h>
 
index 8a320e6..6ba984d 100644 (file)
@@ -791,10 +791,10 @@ config INPUT_IQS626A
          module will be called iqs626a.
 
 config INPUT_IQS7222
-       tristate "Azoteq IQS7222A/B/C capacitive touch controller"
+       tristate "Azoteq IQS7222A/B/C/D capacitive touch controller"
        depends on I2C
        help
-         Say Y to enable support for the Azoteq IQS7222A/B/C family
+         Say Y to enable support for the Azoteq IQS7222A/B/C/D family
          of capacitive touch controllers.
 
          To compile this driver as a module, choose M here: the
index 879790b..85cddb8 100644 (file)
@@ -1,16 +1,8 @@
-/**
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  * CPCAP Power Button Input Driver
  *
  * Copyright (C) 2017 Sebastian Reichel <sre@kernel.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
index b14a389..74808ba 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/workqueue.h>
 #include <linux/regmap.h>
 #include <linux/of.h>
@@ -251,6 +252,14 @@ static int da9063_onkey_probe(struct platform_device *pdev)
                return error;
        }
 
+       error = dev_pm_set_wake_irq(&pdev->dev, irq);
+       if (error)
+               dev_warn(&pdev->dev,
+                        "Failed to set IRQ %d as a wake IRQ: %d\n",
+                        irq, error);
+       else
+               device_init_wakeup(&pdev->dev, true);
+
        error = input_register_device(onkey->input);
        if (error) {
                dev_err(&pdev->dev,
index 134a130..ad44b4d 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
@@ -113,22 +113,14 @@ static int gpio_vibrator_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
-       err = PTR_ERR_OR_ZERO(vibrator->vcc);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request regulator: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->vcc))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+                                    "Failed to request regulator\n");
 
        vibrator->gpio = devm_gpiod_get(&pdev->dev, "enable", GPIOD_OUT_LOW);
-       err = PTR_ERR_OR_ZERO(vibrator->gpio);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request main gpio: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->gpio))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->gpio),
+                                    "Failed to request main gpio\n");
 
        INIT_WORK(&vibrator->play_work, gpio_vibrator_play_work);
 
index 1272ef7..c0a0856 100644 (file)
@@ -17,9 +17,9 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 50035c2..0dab54d 100644 (file)
@@ -19,8 +19,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 096b092..36aeeae 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Azoteq IQS7222A/B/C Capacitive Touch Controller
+ * Azoteq IQS7222A/B/C/D Capacitive Touch Controller
  *
  * Copyright (C) 2022 Jeff LaBundy <jeff@labundy.com>
  */
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/ktime.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
@@ -25,6 +26,7 @@
 #define IQS7222_PROD_NUM_A                     840
 #define IQS7222_PROD_NUM_B                     698
 #define IQS7222_PROD_NUM_C                     863
+#define IQS7222_PROD_NUM_D                     1046
 
 #define IQS7222_SYS_STATUS                     0x10
 #define IQS7222_SYS_STATUS_RESET               BIT(3)
@@ -54,6 +56,7 @@
 
 #define IQS7222_EVENT_MASK_ATI                 BIT(12)
 #define IQS7222_EVENT_MASK_SLDR                        BIT(10)
+#define IQS7222_EVENT_MASK_TPAD                        IQS7222_EVENT_MASK_SLDR
 #define IQS7222_EVENT_MASK_TOUCH               BIT(1)
 #define IQS7222_EVENT_MASK_PROX                        BIT(0)
 
@@ -71,6 +74,7 @@
 #define IQS7222_MAX_COLS_CHAN                  6
 #define IQS7222_MAX_COLS_FILT                  2
 #define IQS7222_MAX_COLS_SLDR                  11
+#define IQS7222_MAX_COLS_TPAD                  24
 #define IQS7222_MAX_COLS_GPIO                  3
 #define IQS7222_MAX_COLS_SYS                   13
 
@@ -102,16 +106,18 @@ enum iqs7222_reg_grp_id {
        IQS7222_REG_GRP_BTN,
        IQS7222_REG_GRP_CHAN,
        IQS7222_REG_GRP_SLDR,
+       IQS7222_REG_GRP_TPAD,
        IQS7222_REG_GRP_GPIO,
        IQS7222_REG_GRP_SYS,
        IQS7222_NUM_REG_GRPS
 };
 
 static const char * const iqs7222_reg_grp_names[IQS7222_NUM_REG_GRPS] = {
-       [IQS7222_REG_GRP_CYCLE] = "cycle",
-       [IQS7222_REG_GRP_CHAN] = "channel",
-       [IQS7222_REG_GRP_SLDR] = "slider",
-       [IQS7222_REG_GRP_GPIO] = "gpio",
+       [IQS7222_REG_GRP_CYCLE] = "cycle-%d",
+       [IQS7222_REG_GRP_CHAN] = "channel-%d",
+       [IQS7222_REG_GRP_SLDR] = "slider-%d",
+       [IQS7222_REG_GRP_TPAD] = "trackpad",
+       [IQS7222_REG_GRP_GPIO] = "gpio-%d",
 };
 
 static const unsigned int iqs7222_max_cols[IQS7222_NUM_REG_GRPS] = {
@@ -122,6 +128,7 @@ static const unsigned int iqs7222_max_cols[IQS7222_NUM_REG_GRPS] = {
        [IQS7222_REG_GRP_CHAN] = IQS7222_MAX_COLS_CHAN,
        [IQS7222_REG_GRP_FILT] = IQS7222_MAX_COLS_FILT,
        [IQS7222_REG_GRP_SLDR] = IQS7222_MAX_COLS_SLDR,
+       [IQS7222_REG_GRP_TPAD] = IQS7222_MAX_COLS_TPAD,
        [IQS7222_REG_GRP_GPIO] = IQS7222_MAX_COLS_GPIO,
        [IQS7222_REG_GRP_SYS] = IQS7222_MAX_COLS_SYS,
 };
@@ -130,8 +137,10 @@ static const unsigned int iqs7222_gpio_links[] = { 2, 5, 6, };
 
 struct iqs7222_event_desc {
        const char *name;
+       u16 link;
        u16 mask;
        u16 val;
+       u16 strict;
        u16 enable;
        enum iqs7222_reg_key_id reg_key;
 };
@@ -188,6 +197,93 @@ static const struct iqs7222_event_desc iqs7222_sl_events[] = {
        },
 };
 
+static const struct iqs7222_event_desc iqs7222_tp_events[] = {
+       {
+               .name = "event-press",
+               .link = BIT(7),
+       },
+       {
+               .name = "event-tap",
+               .link = BIT(0),
+               .mask = BIT(0),
+               .val = BIT(0),
+               .enable = BIT(0),
+               .reg_key = IQS7222_REG_KEY_TAP,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .link = BIT(2),
+               .mask = BIT(2) | BIT(1),
+               .val = BIT(2),
+               .strict = BIT(4),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .link = BIT(3),
+               .mask = BIT(3) | BIT(1),
+               .val = BIT(3),
+               .strict = BIT(3),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .link = BIT(4),
+               .mask = BIT(4) | BIT(1),
+               .val = BIT(4),
+               .strict = BIT(4),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .link = BIT(5),
+               .mask = BIT(5) | BIT(1),
+               .val = BIT(5),
+               .strict = BIT(3),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-x-pos",
+               .link = BIT(2),
+               .mask = BIT(2) | BIT(1),
+               .val = BIT(2) | BIT(1),
+               .strict = BIT(4),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-y-pos",
+               .link = BIT(3),
+               .mask = BIT(3) | BIT(1),
+               .val = BIT(3) | BIT(1),
+               .strict = BIT(3),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-x-neg",
+               .link = BIT(4),
+               .mask = BIT(4) | BIT(1),
+               .val = BIT(4) | BIT(1),
+               .strict = BIT(4),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-y-neg",
+               .link = BIT(5),
+               .mask = BIT(5) | BIT(1),
+               .val = BIT(5) | BIT(1),
+               .strict = BIT(3),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+};
+
 struct iqs7222_reg_grp_desc {
        u16 base;
        int num_row;
@@ -524,6 +620,62 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
                        },
                },
        },
+       {
+               .prod_num = IQS7222_PROD_NUM_D,
+               .fw_major = 0,
+               .fw_minor = 37,
+               .touch_link = 1770,
+               .allow_offset = 9,
+               .event_offset = 10,
+               .comms_offset = 11,
+               .reg_grps = {
+                       [IQS7222_REG_GRP_STAT] = {
+                               .base = IQS7222_SYS_STATUS,
+                               .num_row = 1,
+                               .num_col = 7,
+                       },
+                       [IQS7222_REG_GRP_CYCLE] = {
+                               .base = 0x8000,
+                               .num_row = 7,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_GLBL] = {
+                               .base = 0x8700,
+                               .num_row = 1,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_BTN] = {
+                               .base = 0x9000,
+                               .num_row = 14,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_CHAN] = {
+                               .base = 0xA000,
+                               .num_row = 14,
+                               .num_col = 4,
+                       },
+                       [IQS7222_REG_GRP_FILT] = {
+                               .base = 0xAE00,
+                               .num_row = 1,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_TPAD] = {
+                               .base = 0xB000,
+                               .num_row = 1,
+                               .num_col = 24,
+                       },
+                       [IQS7222_REG_GRP_GPIO] = {
+                               .base = 0xC000,
+                               .num_row = 3,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_SYS] = {
+                               .base = IQS7222_SYS_SETUP,
+                               .num_row = 1,
+                               .num_col = 12,
+                       },
+               },
+       },
 };
 
 struct iqs7222_prop_desc {
@@ -1009,6 +1161,123 @@ static const struct iqs7222_prop_desc iqs7222_props[] = {
                .label = "maximum gesture time",
        },
        {
+               .name = "azoteq,num-rows",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 0,
+               .reg_shift = 4,
+               .reg_width = 4,
+               .val_min = 1,
+               .val_max = 12,
+               .label = "number of rows",
+       },
+       {
+               .name = "azoteq,num-cols",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 0,
+               .reg_shift = 0,
+               .reg_width = 4,
+               .val_min = 1,
+               .val_max = 12,
+               .label = "number of columns",
+       },
+       {
+               .name = "azoteq,lower-cal-y",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 1,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "lower vertical calibration",
+       },
+       {
+               .name = "azoteq,lower-cal-x",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 1,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "lower horizontal calibration",
+       },
+       {
+               .name = "azoteq,upper-cal-y",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 2,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "upper vertical calibration",
+       },
+       {
+               .name = "azoteq,upper-cal-x",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 2,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "upper horizontal calibration",
+       },
+       {
+               .name = "azoteq,top-speed",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 3,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 4,
+               .label = "top speed",
+       },
+       {
+               .name = "azoteq,bottom-speed",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 3,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "bottom speed",
+       },
+       {
+               .name = "azoteq,gesture-min-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 20,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "minimum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+               .reg_offset = 21,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 21,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 22,
+               .reg_shift = 0,
+               .reg_width = 16,
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+               .reg_offset = 23,
+               .reg_shift = 0,
+               .reg_width = 16,
+               .label = "gesture distance",
+       },
+       {
                .name = "drive-open-drain",
                .reg_grp = IQS7222_REG_GRP_GPIO,
                .reg_offset = 0,
@@ -1091,16 +1360,19 @@ struct iqs7222_private {
        struct gpio_desc *irq_gpio;
        struct i2c_client *client;
        struct input_dev *keypad;
+       struct touchscreen_properties prop;
        unsigned int kp_type[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
        unsigned int kp_code[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
        unsigned int sl_code[IQS7222_MAX_SLDR][ARRAY_SIZE(iqs7222_sl_events)];
        unsigned int sl_axis[IQS7222_MAX_SLDR];
+       unsigned int tp_code[ARRAY_SIZE(iqs7222_tp_events)];
        u16 cycle_setup[IQS7222_MAX_CHAN / 2][IQS7222_MAX_COLS_CYCLE];
        u16 glbl_setup[IQS7222_MAX_COLS_GLBL];
        u16 btn_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_BTN];
        u16 chan_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_CHAN];
        u16 filt_setup[IQS7222_MAX_COLS_FILT];
        u16 sldr_setup[IQS7222_MAX_SLDR][IQS7222_MAX_COLS_SLDR];
+       u16 tpad_setup[IQS7222_MAX_COLS_TPAD];
        u16 gpio_setup[ARRAY_SIZE(iqs7222_gpio_links)][IQS7222_MAX_COLS_GPIO];
        u16 sys_setup[IQS7222_MAX_COLS_SYS];
 };
@@ -1127,6 +1399,9 @@ static u16 *iqs7222_setup(struct iqs7222_private *iqs7222,
        case IQS7222_REG_GRP_SLDR:
                return iqs7222->sldr_setup[row];
 
+       case IQS7222_REG_GRP_TPAD:
+               return iqs7222->tpad_setup;
+
        case IQS7222_REG_GRP_GPIO:
                return iqs7222->gpio_setup[row];
 
@@ -1381,9 +1656,6 @@ static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222)
        if (error)
                return error;
 
-       sys_setup &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
-       sys_setup &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
-
        for (i = 0; i < IQS7222_NUM_RETRIES; i++) {
                /*
                 * Trigger ATI from streaming and normal-power modes so that
@@ -1561,8 +1833,11 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
                        return error;
        }
 
-       if (dir == READ)
+       if (dir == READ) {
+               iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
+               iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
                return 0;
+       }
 
        return iqs7222_ati_trigger(iqs7222);
 }
@@ -1936,6 +2211,14 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222,
                ref_setup[4] = dev_desc->touch_link;
                if (fwnode_property_present(chan_node, "azoteq,use-prox"))
                        ref_setup[4] -= 2;
+       } else if (dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row &&
+                  fwnode_property_present(chan_node,
+                                          "azoteq,counts-filt-enable")) {
+               /*
+                * In the case of IQS7222D, however, the reference mode field
+                * is partially repurposed as a counts filter enable control.
+                */
+               chan_setup[0] |= IQS7222_CHAN_SETUP_0_REF_MODE_REF;
        }
 
        if (fwnode_property_present(chan_node, "azoteq,rx-enable")) {
@@ -2278,6 +2561,136 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222,
                                   IQS7222_REG_KEY_NO_WHEEL);
 }
 
+static int iqs7222_parse_tpad(struct iqs7222_private *iqs7222,
+                             struct fwnode_handle *tpad_node, int tpad_index)
+{
+       const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc;
+       struct touchscreen_properties *prop = &iqs7222->prop;
+       struct i2c_client *client = iqs7222->client;
+       int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row;
+       int count, error, i;
+       u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset];
+       u16 *tpad_setup = iqs7222->tpad_setup;
+       unsigned int chan_sel[12];
+
+       error = iqs7222_parse_props(iqs7222, tpad_node, tpad_index,
+                                   IQS7222_REG_GRP_TPAD,
+                                   IQS7222_REG_KEY_NONE);
+       if (error)
+               return error;
+
+       count = fwnode_property_count_u32(tpad_node, "azoteq,channel-select");
+       if (count < 0) {
+               dev_err(&client->dev, "Failed to count %s channels: %d\n",
+                       fwnode_get_name(tpad_node), count);
+               return count;
+       } else if (!count || count > ARRAY_SIZE(chan_sel)) {
+               dev_err(&client->dev, "Invalid number of %s channels\n",
+                       fwnode_get_name(tpad_node));
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tpad_node,
+                                              "azoteq,channel-select",
+                                              chan_sel, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read %s channels: %d\n",
+                       fwnode_get_name(tpad_node), error);
+               return error;
+       }
+
+       tpad_setup[6] &= ~GENMASK(num_chan - 1, 0);
+
+       for (i = 0; i < ARRAY_SIZE(chan_sel); i++) {
+               tpad_setup[8 + i] = 0;
+               if (i >= count || chan_sel[i] == U8_MAX)
+                       continue;
+
+               if (chan_sel[i] >= num_chan) {
+                       dev_err(&client->dev, "Invalid %s channel: %u\n",
+                               fwnode_get_name(tpad_node), chan_sel[i]);
+                       return -EINVAL;
+               }
+
+               /*
+                * The following fields indicate which channels participate in
+                * the trackpad, as well as each channel's relative placement.
+                */
+               tpad_setup[6] |= BIT(chan_sel[i]);
+               tpad_setup[8 + i] = chan_sel[i] * 34 + 1072;
+       }
+
+       tpad_setup[7] = dev_desc->touch_link;
+       if (fwnode_property_present(tpad_node, "azoteq,use-prox"))
+               tpad_setup[7] -= 2;
+
+       for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++)
+               tpad_setup[20] &= ~(iqs7222_tp_events[i].strict |
+                                   iqs7222_tp_events[i].enable);
+
+       for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++) {
+               const char *event_name = iqs7222_tp_events[i].name;
+               struct fwnode_handle *event_node;
+
+               event_node = fwnode_get_named_child_node(tpad_node, event_name);
+               if (!event_node)
+                       continue;
+
+               if (fwnode_property_present(event_node,
+                                           "azoteq,gesture-angle-tighten"))
+                       tpad_setup[20] |= iqs7222_tp_events[i].strict;
+
+               tpad_setup[20] |= iqs7222_tp_events[i].enable;
+
+               error = iqs7222_parse_event(iqs7222, event_node, tpad_index,
+                                           IQS7222_REG_GRP_TPAD,
+                                           iqs7222_tp_events[i].reg_key,
+                                           iqs7222_tp_events[i].link, 1566,
+                                           NULL,
+                                           &iqs7222->tp_code[i]);
+               fwnode_handle_put(event_node);
+               if (error)
+                       return error;
+
+               if (!dev_desc->event_offset)
+                       continue;
+
+               /*
+                * The press/release event is determined based on whether the
+                * coordinate fields report 0xFFFF and solely relies on touch
+                * or proximity interrupts to be unmasked.
+                */
+               if (i)
+                       *event_mask |= IQS7222_EVENT_MASK_TPAD;
+               else if (tpad_setup[7] == dev_desc->touch_link)
+                       *event_mask |= IQS7222_EVENT_MASK_TOUCH;
+               else
+                       *event_mask |= IQS7222_EVENT_MASK_PROX;
+       }
+
+       if (!iqs7222->tp_code[0])
+               return 0;
+
+       input_set_abs_params(iqs7222->keypad, ABS_X,
+                            0, (tpad_setup[4] ? : 1) - 1, 0, 0);
+
+       input_set_abs_params(iqs7222->keypad, ABS_Y,
+                            0, (tpad_setup[5] ? : 1) - 1, 0, 0);
+
+       touchscreen_parse_properties(iqs7222->keypad, false, prop);
+
+       if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+               dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+                       prop->max_x, prop->max_y);
+               return -EINVAL;
+       }
+
+       tpad_setup[4] = prop->max_x + 1;
+       tpad_setup[5] = prop->max_y + 1;
+
+       return 0;
+}
+
 static int (*iqs7222_parse_extra[IQS7222_NUM_REG_GRPS])
                                (struct iqs7222_private *iqs7222,
                                 struct fwnode_handle *reg_grp_node,
@@ -2285,6 +2698,7 @@ static int (*iqs7222_parse_extra[IQS7222_NUM_REG_GRPS])
        [IQS7222_REG_GRP_CYCLE] = iqs7222_parse_cycle,
        [IQS7222_REG_GRP_CHAN] = iqs7222_parse_chan,
        [IQS7222_REG_GRP_SLDR] = iqs7222_parse_sldr,
+       [IQS7222_REG_GRP_TPAD] = iqs7222_parse_tpad,
 };
 
 static int iqs7222_parse_reg_grp(struct iqs7222_private *iqs7222,
@@ -2298,7 +2712,7 @@ static int iqs7222_parse_reg_grp(struct iqs7222_private *iqs7222,
        if (iqs7222_reg_grp_names[reg_grp]) {
                char reg_grp_name[16];
 
-               snprintf(reg_grp_name, sizeof(reg_grp_name), "%s-%d",
+               snprintf(reg_grp_name, sizeof(reg_grp_name),
                         iqs7222_reg_grp_names[reg_grp], reg_grp_index);
 
                reg_grp_node = device_get_named_child_node(&client->dev,
@@ -2346,8 +2760,8 @@ static int iqs7222_parse_all(struct iqs7222_private *iqs7222)
                        continue;
 
                /*
-                * The IQS7222C exposes multiple GPIO and must be informed
-                * as to which GPIO this group represents.
+                * The IQS7222C and IQS7222D expose multiple GPIO and must be
+                * informed as to which GPIO this group represents.
                 */
                for (j = 0; j < ARRAY_SIZE(iqs7222_gpio_links); j++)
                        gpio_setup[0] &= ~BIT(iqs7222_gpio_links[j]);
@@ -2480,6 +2894,41 @@ static int iqs7222_report(struct iqs7222_private *iqs7222)
                                         iqs7222->sl_code[i][j], 0);
        }
 
+       for (i = 0; i < dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row; i++) {
+               u16 tpad_pos_x = le16_to_cpu(status[4]);
+               u16 tpad_pos_y = le16_to_cpu(status[5]);
+               u16 state = le16_to_cpu(status[6]);
+
+               input_report_key(iqs7222->keypad, iqs7222->tp_code[0],
+                                tpad_pos_x < U16_MAX);
+
+               if (tpad_pos_x < U16_MAX)
+                       touchscreen_report_pos(iqs7222->keypad, &iqs7222->prop,
+                                              tpad_pos_x, tpad_pos_y, false);
+
+               if (!(le16_to_cpu(status[1]) & IQS7222_EVENT_MASK_TPAD))
+                       continue;
+
+               /*
+                * Skip the press/release event, as it does not have separate
+                * status fields and is handled separately.
+                */
+               for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++) {
+                       u16 mask = iqs7222_tp_events[j].mask;
+                       u16 val = iqs7222_tp_events[j].val;
+
+                       input_report_key(iqs7222->keypad,
+                                        iqs7222->tp_code[j],
+                                        (state & mask) == val);
+               }
+
+               input_sync(iqs7222->keypad);
+
+               for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++)
+                       input_report_key(iqs7222->keypad,
+                                        iqs7222->tp_code[j], 0);
+       }
+
        input_sync(iqs7222->keypad);
 
        return 0;
@@ -2584,6 +3033,7 @@ static const struct of_device_id iqs7222_of_match[] = {
        { .compatible = "azoteq,iqs7222a" },
        { .compatible = "azoteq,iqs7222b" },
        { .compatible = "azoteq,iqs7222c" },
+       { .compatible = "azoteq,iqs7222d" },
        { }
 };
 MODULE_DEVICE_TABLE(of, iqs7222_of_match);
@@ -2598,5 +3048,5 @@ static struct i2c_driver iqs7222_i2c_driver = {
 module_i2c_driver(iqs7222_i2c_driver);
 
 MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
-MODULE_DESCRIPTION("Azoteq IQS7222A/B/C Capacitive Touch Controller");
+MODULE_DESCRIPTION("Azoteq IQS7222A/B/C/D Capacitive Touch Controller");
 MODULE_LICENSE("GPL");
index 76a190b..662b436 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 
 #define MMA8450_DRV_NAME       "mma8450"
 
index 74d77d8..ba747c5 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
index 04cb87e..5c288fe 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 89fb137..c406a1c 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/regmap.h>
 #include <linux/log2.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define PON_CNTL_1 0x1C
 #define PON_CNTL_PULL_UP BIT(7)
index 3cf1812..1e731d8 100644 (file)
@@ -132,13 +132,8 @@ static int pwm_beeper_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        beeper->pwm = devm_pwm_get(dev, NULL);
-       if (IS_ERR(beeper->pwm)) {
-               error = PTR_ERR(beeper->pwm);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request PWM device: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(beeper->pwm))
+               return dev_err_probe(dev, PTR_ERR(beeper->pwm), "Failed to request PWM device\n");
 
        /* Sync up PWM state and ensure it is off. */
        pwm_init_state(beeper->pwm, &state);
@@ -151,13 +146,9 @@ static int pwm_beeper_probe(struct platform_device *pdev)
        }
 
        beeper->amplifier = devm_regulator_get(dev, "amp");
-       if (IS_ERR(beeper->amplifier)) {
-               error = PTR_ERR(beeper->amplifier);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get 'amp' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(beeper->amplifier))
+               return dev_err_probe(dev, PTR_ERR(beeper->amplifier),
+                                    "Failed to get 'amp' regulator\n");
 
        INIT_WORK(&beeper->work, pwm_beeper_work);
 
index 2ba0352..acac79c 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/pwm.h>
@@ -140,32 +140,20 @@ static int pwm_vibrator_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
-       err = PTR_ERR_OR_ZERO(vibrator->vcc);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request regulator: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->vcc))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+                                    "Failed to request regulator\n");
 
        vibrator->enable_gpio = devm_gpiod_get_optional(&pdev->dev, "enable",
                                                        GPIOD_OUT_LOW);
-       err = PTR_ERR_OR_ZERO(vibrator->enable_gpio);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request enable gpio: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->enable_gpio))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->enable_gpio),
+                                    "Failed to request enable gpio\n");
 
        vibrator->pwm = devm_pwm_get(&pdev->dev, "enable");
-       err = PTR_ERR_OR_ZERO(vibrator->pwm);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request main pwm: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->pwm))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->pwm),
+                                    "Failed to request main pwm\n");
 
        INIT_WORK(&vibrator->play_work, pwm_vibrator_play_work);
 
index 22ec620..e94cab8 100644 (file)
@@ -236,12 +236,8 @@ static int rotary_encoder_probe(struct platform_device *pdev)
                device_property_read_bool(dev, "rotary-encoder,relative-axis");
 
        encoder->gpios = devm_gpiod_get_array(dev, NULL, GPIOD_IN);
-       if (IS_ERR(encoder->gpios)) {
-               err = PTR_ERR(encoder->gpios);
-               if (err != -EPROBE_DEFER)
-                       dev_err(dev, "unable to get gpios: %d\n", err);
-               return err;
-       }
+       if (IS_ERR(encoder->gpios))
+               return dev_err_probe(dev, PTR_ERR(encoder->gpios), "unable to get gpios\n");
        if (encoder->gpios->ndescs < 2) {
                dev_err(dev, "not enough gpios found\n");
                return -EINVAL;
@@ -255,7 +251,6 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 
        input->name = pdev->name;
        input->id.bustype = BUS_HOST;
-       input->dev.parent = dev;
 
        if (encoder->relative_axis)
                input_set_capability(input, EV_REL, encoder->axis);
index cdcb773..e5dd847 100644 (file)
@@ -9,7 +9,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include <asm/io.h>
index 0cff742..148a601 100644 (file)
@@ -1221,13 +1221,8 @@ static int elan_probe(struct i2c_client *client)
        mutex_init(&data->sysfs_mutex);
 
        data->vcc = devm_regulator_get(dev, "vcc");
-       if (IS_ERR(data->vcc)) {
-               error = PTR_ERR(data->vcc);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get 'vcc' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(data->vcc))
+               return dev_err_probe(dev, PTR_ERR(data->vcc), "Failed to get 'vcc' regulator\n");
 
        error = regulator_enable(data->vcc);
        if (error) {
index 2a2459b..7b13de9 100644 (file)
@@ -5,6 +5,7 @@
 
 #define pr_fmt(fmt)            KBUILD_MODNAME ": " fmt
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/libps2.h>
@@ -118,13 +119,18 @@ static psmouse_ret_t psmouse_smbus_process_byte(struct psmouse *psmouse)
        return PSMOUSE_FULL_PACKET;
 }
 
-static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+static void psmouse_activate_smbus_mode(struct psmouse_smbus_dev *smbdev)
 {
-       struct psmouse_smbus_dev *smbdev = psmouse->private;
-
-       if (smbdev->need_deactivate)
-               psmouse_deactivate(psmouse);
+       if (smbdev->need_deactivate) {
+               psmouse_deactivate(smbdev->psmouse);
+               /* Give the device time to switch into SMBus mode */
+               msleep(30);
+       }
+}
 
+static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+{
+       psmouse_activate_smbus_mode(psmouse->private);
        return 0;
 }
 
@@ -257,8 +263,7 @@ int psmouse_smbus_init(struct psmouse *psmouse,
                }
        }
 
-       if (need_deactivate)
-               psmouse_deactivate(psmouse);
+       psmouse_activate_smbus_mode(smbdev);
 
        psmouse->private = smbdev;
        psmouse->protocol_handler = psmouse_smbus_process_byte;
index 513d96e..3f6866d 100644 (file)
  * Contributors: Daniel Hellstrom <daniel@gaisler.com>
  */
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/module.h>
 #include <linux/serio.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/device.h>
 #include <linux/delay.h>
index 028e45b..1724d6c 100644 (file)
@@ -1281,6 +1281,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
                .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
                                        SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
        },
+       /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */
+       {
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "PD5x_7xPNP_PNR_PNN_PNT"),
+               },
+               .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+       },
        {
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
index c712c1f..b68793b 100644 (file)
@@ -2,7 +2,9 @@
 #ifndef _I8042_SPARCIO_H
 #define _I8042_SPARCIO_H
 
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #include <asm/io.h>
index ce420eb..e8a9709 100644 (file)
@@ -101,12 +101,12 @@ static int rpckbd_probe(struct platform_device *dev)
        int tx_irq, rx_irq;
 
        rx_irq = platform_get_irq(dev, 0);
-       if (rx_irq <= 0)
-               return rx_irq < 0 ? rx_irq : -ENXIO;
+       if (rx_irq < 0)
+               return rx_irq;
 
        tx_irq = platform_get_irq(dev, 1);
-       if (tx_irq <= 0)
-               return tx_irq < 0 ? tx_irq : -ENXIO;
+       if (tx_irq < 0)
+               return tx_irq;
 
        serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
        rpckbd = kzalloc(sizeof(*rpckbd), GFP_KERNEL);
index 960d760..f3d28da 100644 (file)
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #define DRIVER_NAME            "xilinx_ps2"
 
index c2cbd33..e3e2324 100644 (file)
@@ -655,10 +655,10 @@ config TOUCHSCREEN_MTOUCH
          module will be called mtouch.
 
 config TOUCHSCREEN_NOVATEK_NVT_TS
-       tristate "Novatek NVT-ts touchscreen support"
+       tristate "Novatek NT11205 touchscreen support"
        depends on I2C
        help
-         Say Y here if you have a Novatek NVT-ts touchscreen.
+         Say Y here if you have a Novatek NT11205 touchscreen.
          If unsure, say N.
 
          To compile this driver as a module, choose M here: the
@@ -1365,6 +1365,16 @@ config TOUCHSCREEN_IQS5XX
          To compile this driver as a module, choose M here: the
          module will be called iqs5xx.
 
+config TOUCHSCREEN_IQS7211
+       tristate "Azoteq IQS7210A/7211A/E trackpad/touchscreen controller"
+       depends on I2C
+       help
+         Say Y to enable support for the Azoteq IQS7210A/7211A/E
+         family of trackpad/touchscreen controllers.
+
+         To compile this driver as a module, choose M here: the
+         module will be called iqs7211.
+
 config TOUCHSCREEN_ZINITIX
        tristate "Zinitix touchscreen support"
        depends on I2C
index 159cd51..62bd24f 100644 (file)
@@ -115,5 +115,6 @@ obj-$(CONFIG_TOUCHSCREEN_COLIBRI_VF50)      += colibri-vf50-ts.o
 obj-$(CONFIG_TOUCHSCREEN_ROHM_BU21023) += rohm_bu21023.o
 obj-$(CONFIG_TOUCHSCREEN_RASPBERRYPI_FW)       += raspberrypi-ts.o
 obj-$(CONFIG_TOUCHSCREEN_IQS5XX)       += iqs5xx.o
+obj-$(CONFIG_TOUCHSCREEN_IQS7211)      += iqs7211.o
 obj-$(CONFIG_TOUCHSCREEN_ZINITIX)      += zinitix.o
 obj-$(CONFIG_TOUCHSCREEN_HIMAX_HX83112B)       += himax_hx83112b.o
index 85332cf..652439a 100644 (file)
@@ -410,31 +410,32 @@ static int bu21013_probe(struct i2c_client *client)
        struct input_dev *in_dev;
        struct input_absinfo *info;
        u32 max_x = 0, max_y = 0;
+       struct device *dev = &client->dev;
        int error;
 
        if (!i2c_check_functionality(client->adapter,
                                     I2C_FUNC_SMBUS_BYTE_DATA)) {
-               dev_err(&client->dev, "i2c smbus byte data not supported\n");
+               dev_err(dev, "i2c smbus byte data not supported\n");
                return -EIO;
        }
 
        if (!client->irq) {
-               dev_err(&client->dev, "No IRQ set up\n");
+               dev_err(dev, "No IRQ set up\n");
                return -EINVAL;
        }
 
-       ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+       ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL);
        if (!ts)
                return -ENOMEM;
 
        ts->client = client;
 
-       ts->x_flip = device_property_read_bool(&client->dev, "rohm,flip-x");
-       ts->y_flip = device_property_read_bool(&client->dev, "rohm,flip-y");
+       ts->x_flip = device_property_read_bool(dev, "rohm,flip-x");
+       ts->y_flip = device_property_read_bool(dev, "rohm,flip-y");
 
-       in_dev = devm_input_allocate_device(&client->dev);
+       in_dev = devm_input_allocate_device(dev);
        if (!in_dev) {
-               dev_err(&client->dev, "device memory alloc failed\n");
+               dev_err(dev, "device memory alloc failed\n");
                return -ENOMEM;
        }
        ts->in_dev = in_dev;
@@ -444,8 +445,8 @@ static int bu21013_probe(struct i2c_client *client)
        in_dev->name = DRIVER_TP;
        in_dev->id.bustype = BUS_I2C;
 
-       device_property_read_u32(&client->dev, "rohm,touch-max-x", &max_x);
-       device_property_read_u32(&client->dev, "rohm,touch-max-y", &max_y);
+       device_property_read_u32(dev, "rohm,touch-max-x", &max_x);
+       device_property_read_u32(dev, "rohm,touch-max-y", &max_y);
 
        input_set_abs_params(in_dev, ABS_MT_POSITION_X, 0, max_x, 0, 0);
        input_set_abs_params(in_dev, ABS_MT_POSITION_Y, 0, max_y, 0, 0);
@@ -454,14 +455,14 @@ static int bu21013_probe(struct i2c_client *client)
 
        /* Adjust for the legacy "flip" properties, if present */
        if (!ts->props.invert_x &&
-           device_property_read_bool(&client->dev, "rohm,flip-x")) {
+           device_property_read_bool(dev, "rohm,flip-x")) {
                info = &in_dev->absinfo[ABS_MT_POSITION_X];
                info->maximum -= info->minimum;
                info->minimum = 0;
        }
 
        if (!ts->props.invert_y &&
-           device_property_read_bool(&client->dev, "rohm,flip-y")) {
+           device_property_read_bool(dev, "rohm,flip-y")) {
                info = &in_dev->absinfo[ABS_MT_POSITION_Y];
                info->maximum -= info->minimum;
                info->minimum = 0;
@@ -471,55 +472,46 @@ static int bu21013_probe(struct i2c_client *client)
                                    INPUT_MT_DIRECT | INPUT_MT_TRACK |
                                        INPUT_MT_DROP_UNUSED);
        if (error) {
-               dev_err(&client->dev, "failed to initialize MT slots");
+               dev_err(dev, "failed to initialize MT slots");
                return error;
        }
 
-       ts->regulator = devm_regulator_get(&client->dev, "avdd");
+       ts->regulator = devm_regulator_get(dev, "avdd");
        if (IS_ERR(ts->regulator)) {
-               dev_err(&client->dev, "regulator_get failed\n");
+               dev_err(dev, "regulator_get failed\n");
                return PTR_ERR(ts->regulator);
        }
 
        error = regulator_enable(ts->regulator);
        if (error) {
-               dev_err(&client->dev, "regulator enable failed\n");
+               dev_err(dev, "regulator enable failed\n");
                return error;
        }
 
-       error = devm_add_action_or_reset(&client->dev, bu21013_power_off, ts);
+       error = devm_add_action_or_reset(dev, bu21013_power_off, ts);
        if (error) {
-               dev_err(&client->dev, "failed to install power off handler\n");
+               dev_err(dev, "failed to install power off handler\n");
                return error;
        }
 
        /* Named "CS" on the chip, DT binding is "reset" */
-       ts->cs_gpiod = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
-       error = PTR_ERR_OR_ZERO(ts->cs_gpiod);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "failed to get CS GPIO\n");
-               return error;
-       }
+       ts->cs_gpiod = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(ts->cs_gpiod))
+               return dev_err_probe(dev, PTR_ERR(ts->cs_gpiod), "failed to get CS GPIO\n");
+
        gpiod_set_consumer_name(ts->cs_gpiod, "BU21013 CS");
 
-       error = devm_add_action_or_reset(&client->dev,
-                                        bu21013_disable_chip, ts);
+       error = devm_add_action_or_reset(dev, bu21013_disable_chip, ts);
        if (error) {
-               dev_err(&client->dev,
-                       "failed to install chip disable handler\n");
+               dev_err(dev, "failed to install chip disable handler\n");
                return error;
        }
 
        /* Named "INT" on the chip, DT binding is "touch" */
-       ts->int_gpiod = devm_gpiod_get_optional(&client->dev,
-                                               "touch", GPIOD_IN);
+       ts->int_gpiod = devm_gpiod_get_optional(dev, "touch", GPIOD_IN);
        error = PTR_ERR_OR_ZERO(ts->int_gpiod);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "failed to get INT GPIO\n");
-               return error;
-       }
+       if (error)
+               return dev_err_probe(dev, error, "failed to get INT GPIO\n");
 
        if (ts->int_gpiod)
                gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
@@ -527,22 +519,20 @@ static int bu21013_probe(struct i2c_client *client)
        /* configure the touch panel controller */
        error = bu21013_init_chip(ts);
        if (error) {
-               dev_err(&client->dev, "error in bu21013 config\n");
+               dev_err(dev, "error in bu21013 config\n");
                return error;
        }
 
-       error = devm_request_threaded_irq(&client->dev, client->irq,
-                                         NULL, bu21013_gpio_irq,
+       error = devm_request_threaded_irq(dev, client->irq, NULL, bu21013_gpio_irq,
                                          IRQF_ONESHOT, DRIVER_TP, ts);
        if (error) {
-               dev_err(&client->dev, "request irq %d failed\n",
-                       client->irq);
+               dev_err(dev, "request irq %d failed\n", client->irq);
                return error;
        }
 
        error = input_register_device(in_dev);
        if (error) {
-               dev_err(&client->dev, "failed to register input device\n");
+               dev_err(dev, "failed to register input device\n");
                return error;
        }
 
index c8126d2..e1dfbd9 100644 (file)
@@ -333,6 +333,7 @@ static void bu21029_stop_chip(struct input_dev *dev)
 
 static int bu21029_probe(struct i2c_client *client)
 {
+       struct device *dev = &client->dev;
        struct bu21029_ts_data *bu21029;
        struct input_dev *in_dev;
        int error;
@@ -341,45 +342,33 @@ static int bu21029_probe(struct i2c_client *client)
                                     I2C_FUNC_SMBUS_WRITE_BYTE |
                                     I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
                                     I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
-               dev_err(&client->dev,
-                       "i2c functionality support is not sufficient\n");
+               dev_err(dev, "i2c functionality support is not sufficient\n");
                return -EIO;
        }
 
-       bu21029 = devm_kzalloc(&client->dev, sizeof(*bu21029), GFP_KERNEL);
+       bu21029 = devm_kzalloc(dev, sizeof(*bu21029), GFP_KERNEL);
        if (!bu21029)
                return -ENOMEM;
 
-       error = device_property_read_u32(&client->dev, "rohm,x-plate-ohms",
-                                        &bu21029->x_plate_ohms);
+       error = device_property_read_u32(dev, "rohm,x-plate-ohms", &bu21029->x_plate_ohms);
        if (error) {
-               dev_err(&client->dev,
-                       "invalid 'x-plate-ohms' supplied: %d\n", error);
+               dev_err(dev, "invalid 'x-plate-ohms' supplied: %d\n", error);
                return error;
        }
 
-       bu21029->vdd = devm_regulator_get(&client->dev, "vdd");
-       if (IS_ERR(bu21029->vdd)) {
-               error = PTR_ERR(bu21029->vdd);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to acquire 'vdd' supply: %d\n", error);
-               return error;
-       }
+       bu21029->vdd = devm_regulator_get(dev, "vdd");
+       if (IS_ERR(bu21029->vdd))
+               return dev_err_probe(dev, PTR_ERR(bu21029->vdd),
+                                    "failed to acquire 'vdd' supply\n");
 
-       bu21029->reset_gpios = devm_gpiod_get_optional(&client->dev,
-                                                      "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(bu21029->reset_gpios)) {
-               error = PTR_ERR(bu21029->reset_gpios);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to acquire 'reset' gpio: %d\n", error);
-               return error;
-       }
+       bu21029->reset_gpios = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(bu21029->reset_gpios))
+               return dev_err_probe(dev, PTR_ERR(bu21029->reset_gpios),
+                                    "failed to acquire 'reset' gpio\n");
 
-       in_dev = devm_input_allocate_device(&client->dev);
+       in_dev = devm_input_allocate_device(dev);
        if (!in_dev) {
-               dev_err(&client->dev, "unable to allocate input device\n");
+               dev_err(dev, "unable to allocate input device\n");
                return -ENOMEM;
        }
 
@@ -400,20 +389,18 @@ static int bu21029_probe(struct i2c_client *client)
 
        input_set_drvdata(in_dev, bu21029);
 
-       error = devm_request_threaded_irq(&client->dev, client->irq,
-                                         NULL, bu21029_touch_soft_irq,
+       error = devm_request_threaded_irq(dev, client->irq, NULL,
+                                         bu21029_touch_soft_irq,
                                          IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                          DRIVER_NAME, bu21029);
        if (error) {
-               dev_err(&client->dev,
-                       "unable to request touch irq: %d\n", error);
+               dev_err(dev, "unable to request touch irq: %d\n", error);
                return error;
        }
 
        error = input_register_device(in_dev);
        if (error) {
-               dev_err(&client->dev,
-                       "unable to register input device: %d\n", error);
+               dev_err(dev, "unable to register input device: %d\n", error);
                return error;
        }
 
index 9fbeaf1..d6876d1 100644 (file)
@@ -191,12 +191,8 @@ static int icn8318_probe(struct i2c_client *client)
                return -ENOMEM;
 
        data->wake_gpio = devm_gpiod_get(dev, "wake", GPIOD_OUT_LOW);
-       if (IS_ERR(data->wake_gpio)) {
-               error = PTR_ERR(data->wake_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Error getting wake gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(data->wake_gpio))
+               return dev_err_probe(dev, PTR_ERR(data->wake_gpio), "Error getting wake gpio\n");
 
        input = devm_input_allocate_device(dev);
        if (!input)
index 967ecde..ea38951 100644 (file)
@@ -258,12 +258,8 @@ static int cy8ctma140_probe(struct i2c_client *client)
        ts->regulators[1].supply = "vdd";
        error = devm_regulator_bulk_get(dev, ARRAY_SIZE(ts->regulators),
                                      ts->regulators);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get regulators %d\n",
-                               error);
-               return error;
-       }
+       if (error)
+               return dev_err_probe(dev, error, "Failed to get regulators\n");
 
        error = cy8ctma140_power_up(ts);
        if (error)
index b461ded..db5a885 100644 (file)
@@ -18,8 +18,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <asm/unaligned.h>
 
index 795c7da..457d533 100644 (file)
@@ -1168,13 +1168,9 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client)
        tsdata->max_support_points = chip_data->max_support_points;
 
        tsdata->vcc = devm_regulator_get(&client->dev, "vcc");
-       if (IS_ERR(tsdata->vcc)) {
-               error = PTR_ERR(tsdata->vcc);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to request regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->vcc))
+               return dev_err_probe(&client->dev, PTR_ERR(tsdata->vcc),
+                                    "failed to request regulator\n");
 
        tsdata->iovcc = devm_regulator_get(&client->dev, "iovcc");
        if (IS_ERR(tsdata->iovcc)) {
index fd8724a..cc3103b 100644 (file)
@@ -264,12 +264,8 @@ static int ektf2127_probe(struct i2c_client *client)
 
        /* This requests the gpio *and* turns on the touchscreen controller */
        ts->power_gpios = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH);
-       if (IS_ERR(ts->power_gpios)) {
-               error = PTR_ERR(ts->power_gpios);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Error getting power gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->power_gpios))
+               return dev_err_probe(dev, PTR_ERR(ts->power_gpios), "Error getting power gpio\n");
 
        input = devm_input_allocate_device(dev);
        if (!input)
index 2da1db6..a1af3de 100644 (file)
@@ -1438,24 +1438,14 @@ static int elants_i2c_probe(struct i2c_client *client)
        i2c_set_clientdata(client, ts);
 
        ts->vcc33 = devm_regulator_get(&client->dev, "vcc33");
-       if (IS_ERR(ts->vcc33)) {
-               error = PTR_ERR(ts->vcc33);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vcc33' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(ts->vcc33))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vcc33),
+                                    "Failed to get 'vcc33' regulator\n");
 
        ts->vccio = devm_regulator_get(&client->dev, "vccio");
-       if (IS_ERR(ts->vccio)) {
-               error = PTR_ERR(ts->vccio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vccio' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(ts->vccio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+                                    "Failed to get 'vccio' regulator\n");
 
        ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
        if (IS_ERR(ts->reset_gpio)) {
index 4af4c1e..4c0d99a 100644 (file)
@@ -7,6 +7,7 @@
  * minimal implementation based on egalax_ts.c and egalax_i2c.c
  */
 
+#include <linux/acpi.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -18,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/regulator/consumer.h>
 #include <linux/sizes.h>
 #include <linux/timer.h>
 #include <asm/unaligned.h>
@@ -360,6 +362,12 @@ static int exc3000_probe(struct i2c_client *client)
        if (IS_ERR(data->reset))
                return PTR_ERR(data->reset);
 
+       /* For proper reset sequence, enable power while reset asserted */
+       error = devm_regulator_get_enable(&client->dev, "vdd");
+       if (error && error != -ENODEV)
+               return dev_err_probe(&client->dev, error,
+                                    "failed to request vdd regulator\n");
+
        if (data->reset) {
                msleep(EXC3000_RESET_MS);
                gpiod_set_value_cansleep(data->reset, 0);
@@ -454,10 +462,19 @@ static const struct of_device_id exc3000_of_match[] = {
 MODULE_DEVICE_TABLE(of, exc3000_of_match);
 #endif
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id exc3000_acpi_match[] = {
+       { "EGA00001", .driver_data = (kernel_ulong_t)&exc3000_info[EETI_EXC80H60] },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, exc3000_acpi_match);
+#endif
+
 static struct i2c_driver exc3000_driver = {
        .driver = {
                .name   = "exc3000",
                .of_match_table = of_match_ptr(exc3000_of_match),
+               .acpi_match_table = ACPI_PTR(exc3000_acpi_match),
        },
        .id_table       = exc3000_id,
        .probe          = exc3000_probe,
index f5aa240..da9954d 100644 (file)
@@ -935,7 +935,6 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
  */
 static int goodix_get_gpio_config(struct goodix_ts_data *ts)
 {
-       int error;
        struct device *dev;
        struct gpio_desc *gpiod;
        bool added_acpi_mappings = false;
@@ -951,33 +950,20 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts)
        ts->gpiod_rst_flags = GPIOD_IN;
 
        ts->avdd28 = devm_regulator_get(dev, "AVDD28");
-       if (IS_ERR(ts->avdd28)) {
-               error = PTR_ERR(ts->avdd28);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev,
-                               "Failed to get AVDD28 regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->avdd28))
+               return dev_err_probe(dev, PTR_ERR(ts->avdd28), "Failed to get AVDD28 regulator\n");
 
        ts->vddio = devm_regulator_get(dev, "VDDIO");
-       if (IS_ERR(ts->vddio)) {
-               error = PTR_ERR(ts->vddio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev,
-                               "Failed to get VDDIO regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->vddio))
+               return dev_err_probe(dev, PTR_ERR(ts->vddio), "Failed to get VDDIO regulator\n");
 
 retry_get_irq_gpio:
        /* Get the interrupt GPIO pin number */
        gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_INT_NAME, GPIOD_IN);
-       if (IS_ERR(gpiod)) {
-               error = PTR_ERR(gpiod);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get %s GPIO: %d\n",
-                               GOODIX_GPIO_INT_NAME, error);
-               return error;
-       }
+       if (IS_ERR(gpiod))
+               return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+                                    GOODIX_GPIO_INT_NAME);
+
        if (!gpiod && has_acpi_companion(dev) && !added_acpi_mappings) {
                added_acpi_mappings = true;
                if (goodix_add_acpi_gpio_mappings(ts) == 0)
@@ -988,13 +974,9 @@ retry_get_irq_gpio:
 
        /* Get the reset line GPIO pin number */
        gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags);
-       if (IS_ERR(gpiod)) {
-               error = PTR_ERR(gpiod);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get %s GPIO: %d\n",
-                               GOODIX_GPIO_RST_NAME, error);
-               return error;
-       }
+       if (IS_ERR(gpiod))
+               return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+                                    GOODIX_GPIO_RST_NAME);
 
        ts->gpiod_rst = gpiod;
 
@@ -1517,6 +1499,7 @@ MODULE_DEVICE_TABLE(i2c, goodix_ts_id);
 static const struct acpi_device_id goodix_acpi_match[] = {
        { "GDIX1001", 0 },
        { "GDIX1002", 0 },
+       { "GDX9110", 0 },
        { }
 };
 MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
index f7cd773..ad6828e 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
index 0aa9d64..b4768b6 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/input/touchscreen/iqs7211.c b/drivers/input/touchscreen/iqs7211.c
new file mode 100644 (file)
index 0000000..dc084f8
--- /dev/null
@@ -0,0 +1,2557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+ *
+ * Copyright (C) 2023 Jeff LaBundy <jeff@labundy.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#define IQS7211_PROD_NUM                       0x00
+
+#define IQS7211_EVENT_MASK_ALL                 GENMASK(14, 8)
+#define IQS7211_EVENT_MASK_ALP                 BIT(13)
+#define IQS7211_EVENT_MASK_BTN                 BIT(12)
+#define IQS7211_EVENT_MASK_ATI                 BIT(11)
+#define IQS7211_EVENT_MASK_MOVE                        BIT(10)
+#define IQS7211_EVENT_MASK_GSTR                        BIT(9)
+#define IQS7211_EVENT_MODE                     BIT(8)
+
+#define IQS7211_COMMS_ERROR                    0xEEEE
+#define IQS7211_COMMS_RETRY_MS                 50
+#define IQS7211_COMMS_SLEEP_US                 100
+#define IQS7211_COMMS_TIMEOUT_US               (100 * USEC_PER_MSEC)
+#define IQS7211_RESET_TIMEOUT_MS               150
+#define IQS7211_START_TIMEOUT_US               (1 * USEC_PER_SEC)
+
+#define IQS7211_NUM_RETRIES                    5
+#define IQS7211_NUM_CRX                                8
+#define IQS7211_MAX_CTX                                13
+
+#define IQS7211_MAX_CONTACTS                   2
+#define IQS7211_MAX_CYCLES                     21
+
+/*
+ * The following delay is used during instances that must wait for the open-
+ * drain RDY pin to settle. Its value is calculated as 5*R*C, where R and C
+ * represent typical datasheet values of 4.7k and 100 nF, respectively.
+ */
+#define iqs7211_irq_wait()                     usleep_range(2500, 2600)
+
+enum iqs7211_dev_id {
+       IQS7210A,
+       IQS7211A,
+       IQS7211E,
+};
+
+enum iqs7211_comms_mode {
+       IQS7211_COMMS_MODE_WAIT,
+       IQS7211_COMMS_MODE_FREE,
+       IQS7211_COMMS_MODE_FORCE,
+};
+
+struct iqs7211_reg_field_desc {
+       struct list_head list;
+       u8 addr;
+       u16 mask;
+       u16 val;
+};
+
+enum iqs7211_reg_key_id {
+       IQS7211_REG_KEY_NONE,
+       IQS7211_REG_KEY_PROX,
+       IQS7211_REG_KEY_TOUCH,
+       IQS7211_REG_KEY_TAP,
+       IQS7211_REG_KEY_HOLD,
+       IQS7211_REG_KEY_PALM,
+       IQS7211_REG_KEY_AXIAL_X,
+       IQS7211_REG_KEY_AXIAL_Y,
+       IQS7211_REG_KEY_RESERVED
+};
+
+enum iqs7211_reg_grp_id {
+       IQS7211_REG_GRP_TP,
+       IQS7211_REG_GRP_BTN,
+       IQS7211_REG_GRP_ALP,
+       IQS7211_REG_GRP_SYS,
+       IQS7211_NUM_REG_GRPS
+};
+
+static const char * const iqs7211_reg_grp_names[IQS7211_NUM_REG_GRPS] = {
+       [IQS7211_REG_GRP_TP] = "trackpad",
+       [IQS7211_REG_GRP_BTN] = "button",
+       [IQS7211_REG_GRP_ALP] = "alp",
+};
+
+static const u16 iqs7211_reg_grp_masks[IQS7211_NUM_REG_GRPS] = {
+       [IQS7211_REG_GRP_TP] = IQS7211_EVENT_MASK_GSTR,
+       [IQS7211_REG_GRP_BTN] = IQS7211_EVENT_MASK_BTN,
+       [IQS7211_REG_GRP_ALP] = IQS7211_EVENT_MASK_ALP,
+};
+
+struct iqs7211_event_desc {
+       const char *name;
+       u16 mask;
+       u16 enable;
+       enum iqs7211_reg_grp_id reg_grp;
+       enum iqs7211_reg_key_id reg_key;
+};
+
+static const struct iqs7211_event_desc iqs7210a_kp_events[] = {
+       {
+               .mask = BIT(10),
+               .enable = BIT(13) | BIT(12),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-prox",
+               .mask = BIT(2),
+               .enable = BIT(5) | BIT(4),
+               .reg_grp = IQS7211_REG_GRP_BTN,
+               .reg_key = IQS7211_REG_KEY_PROX,
+       },
+       {
+               .name = "event-touch",
+               .mask = BIT(3),
+               .enable = BIT(5) | BIT(4),
+               .reg_grp = IQS7211_REG_GRP_BTN,
+               .reg_key = IQS7211_REG_KEY_TOUCH,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(5),
+               .enable = BIT(5),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+};
+
+static const struct iqs7211_event_desc iqs7211a_kp_events[] = {
+       {
+               .mask = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(5),
+               .enable = BIT(5),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+};
+
+static const struct iqs7211_event_desc iqs7211e_kp_events[] = {
+       {
+               .mask = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-tap-double",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-tap-triple",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-palm",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_PALM,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(8),
+               .enable = BIT(8),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(9),
+               .enable = BIT(9),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(10),
+               .enable = BIT(10),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(11),
+               .enable = BIT(11),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-x-pos-hold",
+               .mask = BIT(12),
+               .enable = BIT(12),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg-hold",
+               .mask = BIT(13),
+               .enable = BIT(13),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-y-pos-hold",
+               .mask = BIT(14),
+               .enable = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-y-neg-hold",
+               .mask = BIT(15),
+               .enable = BIT(15),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+};
+
+struct iqs7211_dev_desc {
+       const char *tp_name;
+       const char *kp_name;
+       u16 prod_num;
+       u16 show_reset;
+       u16 ati_error[IQS7211_NUM_REG_GRPS];
+       u16 ati_start[IQS7211_NUM_REG_GRPS];
+       u16 suspend;
+       u16 ack_reset;
+       u16 comms_end;
+       u16 comms_req;
+       int charge_shift;
+       int info_offs;
+       int gesture_offs;
+       int contact_offs;
+       u8 sys_stat;
+       u8 sys_ctrl;
+       u8 alp_config;
+       u8 tp_config;
+       u8 exp_file;
+       u8 kp_enable[IQS7211_NUM_REG_GRPS];
+       u8 gesture_angle;
+       u8 rx_tx_map;
+       u8 cycle_alloc[2];
+       u8 cycle_limit[2];
+       const struct iqs7211_event_desc *kp_events;
+       int num_kp_events;
+       int min_crx_alp;
+       int num_ctx;
+};
+
+static const struct iqs7211_dev_desc iqs7211_devs[] = {
+       [IQS7210A] = {
+               .tp_name = "iqs7210a_trackpad",
+               .kp_name = "iqs7210a_keys",
+               .prod_num = 944,
+               .show_reset = BIT(15),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(12),
+                       [IQS7211_REG_GRP_BTN] = BIT(0),
+                       [IQS7211_REG_GRP_ALP] = BIT(8),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(13),
+                       [IQS7211_REG_GRP_BTN] = BIT(1),
+                       [IQS7211_REG_GRP_ALP] = BIT(9),
+               },
+               .suspend = BIT(11),
+               .ack_reset = BIT(7),
+               .comms_end = BIT(2),
+               .comms_req = BIT(1),
+               .charge_shift = 4,
+               .info_offs = 0,
+               .gesture_offs = 1,
+               .contact_offs = 4,
+               .sys_stat = 0x0A,
+               .sys_ctrl = 0x35,
+               .alp_config = 0x39,
+               .tp_config = 0x4E,
+               .exp_file = 0x57,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x58,
+                       [IQS7211_REG_GRP_BTN] = 0x37,
+                       [IQS7211_REG_GRP_ALP] = 0x37,
+               },
+               .gesture_angle = 0x5F,
+               .rx_tx_map = 0x60,
+               .cycle_alloc = { 0x66, 0x75, },
+               .cycle_limit = { 10, 6, },
+               .kp_events = iqs7210a_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7210a_kp_events),
+               .min_crx_alp = 4,
+               .num_ctx = IQS7211_MAX_CTX - 1,
+       },
+       [IQS7211A] = {
+               .tp_name = "iqs7211a_trackpad",
+               .kp_name = "iqs7211a_keys",
+               .prod_num = 763,
+               .show_reset = BIT(7),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(3),
+                       [IQS7211_REG_GRP_ALP] = BIT(5),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(5),
+                       [IQS7211_REG_GRP_ALP] = BIT(6),
+               },
+               .ack_reset = BIT(7),
+               .comms_req = BIT(4),
+               .charge_shift = 0,
+               .info_offs = 0,
+               .gesture_offs = 1,
+               .contact_offs = 4,
+               .sys_stat = 0x10,
+               .sys_ctrl = 0x50,
+               .tp_config = 0x60,
+               .alp_config = 0x72,
+               .exp_file = 0x74,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x80,
+               },
+               .gesture_angle = 0x87,
+               .rx_tx_map = 0x90,
+               .cycle_alloc = { 0xA0, 0xB0, },
+               .cycle_limit = { 10, 8, },
+               .kp_events = iqs7211a_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7211a_kp_events),
+               .num_ctx = IQS7211_MAX_CTX - 1,
+       },
+       [IQS7211E] = {
+               .tp_name = "iqs7211e_trackpad",
+               .kp_name = "iqs7211e_keys",
+               .prod_num = 1112,
+               .show_reset = BIT(7),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(3),
+                       [IQS7211_REG_GRP_ALP] = BIT(5),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(5),
+                       [IQS7211_REG_GRP_ALP] = BIT(6),
+               },
+               .suspend = BIT(11),
+               .ack_reset = BIT(7),
+               .comms_end = BIT(6),
+               .comms_req = BIT(4),
+               .charge_shift = 0,
+               .info_offs = 1,
+               .gesture_offs = 0,
+               .contact_offs = 2,
+               .sys_stat = 0x0E,
+               .sys_ctrl = 0x33,
+               .tp_config = 0x41,
+               .alp_config = 0x36,
+               .exp_file = 0x4A,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x4B,
+               },
+               .gesture_angle = 0x55,
+               .rx_tx_map = 0x56,
+               .cycle_alloc = { 0x5D, 0x6C, },
+               .cycle_limit = { 10, 11, },
+               .kp_events = iqs7211e_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7211e_kp_events),
+               .num_ctx = IQS7211_MAX_CTX,
+       },
+};
+
+struct iqs7211_prop_desc {
+       const char *name;
+       enum iqs7211_reg_key_id reg_key;
+       u8 reg_addr[IQS7211_NUM_REG_GRPS][ARRAY_SIZE(iqs7211_devs)];
+       int reg_shift;
+       int reg_width;
+       int val_pitch;
+       int val_min;
+       int val_max;
+       const char *label;
+};
+
+static const struct iqs7211_prop_desc iqs7211_props[] = {
+       {
+               .name = "azoteq,ati-frac-div-fine",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 9,
+               .reg_width = 5,
+               .label = "ATI fine fractional divider",
+       },
+       {
+               .name = "azoteq,ati-frac-mult-coarse",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 5,
+               .reg_width = 4,
+               .label = "ATI coarse fractional multiplier",
+       },
+       {
+               .name = "azoteq,ati-frac-div-coarse",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 5,
+               .label = "ATI coarse fractional divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1F,
+                               [IQS7211E] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x24,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7211E] = 0x26,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x24,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7211A] = 0x31,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7211A] = 0x37,
+                       },
+               },
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-target",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x20,
+                               [IQS7211A] = 0x32,
+                               [IQS7211E] = 0x23,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x27,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x28,
+                               [IQS7211A] = 0x38,
+                               [IQS7211E] = 0x27,
+                       },
+               },
+               .label = "ATI target",
+       },
+       {
+               .name = "azoteq,ati-base",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x26,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 8,
+               .label = "ATI base",
+       },
+       {
+               .name = "azoteq,ati-base",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x26,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_pitch = 8,
+               .label = "ATI base",
+       },
+       {
+               .name = "azoteq,rate-active-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x29,
+                       [IQS7211A] = 0x40,
+                       [IQS7211E] = 0x28,
+               },
+               .label = "active mode report rate",
+       },
+       {
+               .name = "azoteq,rate-touch-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2A,
+                       [IQS7211A] = 0x41,
+                       [IQS7211E] = 0x29,
+               },
+               .label = "idle-touch mode report rate",
+       },
+       {
+               .name = "azoteq,rate-idle-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2B,
+                       [IQS7211A] = 0x42,
+                       [IQS7211E] = 0x2A,
+               },
+               .label = "idle mode report rate",
+       },
+       {
+               .name = "azoteq,rate-lp1-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2C,
+                       [IQS7211A] = 0x43,
+                       [IQS7211E] = 0x2B,
+               },
+               .label = "low-power mode 1 report rate",
+       },
+       {
+               .name = "azoteq,rate-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2D,
+                       [IQS7211A] = 0x44,
+                       [IQS7211E] = 0x2C,
+               },
+               .label = "low-power mode 2 report rate",
+       },
+       {
+               .name = "azoteq,timeout-active-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2E,
+                       [IQS7211A] = 0x45,
+                       [IQS7211E] = 0x2D,
+               },
+               .val_pitch = 1000,
+               .label = "active mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-touch-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2F,
+                       [IQS7211A] = 0x46,
+                       [IQS7211E] = 0x2E,
+               },
+               .val_pitch = 1000,
+               .label = "idle-touch mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-idle-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x30,
+                       [IQS7211A] = 0x47,
+                       [IQS7211E] = 0x2F,
+               },
+               .val_pitch = 1000,
+               .label = "idle mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-lp1-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x31,
+                       [IQS7211A] = 0x48,
+                       [IQS7211E] = 0x30,
+               },
+               .val_pitch = 1000,
+               .label = "low-power mode 1 timeout",
+       },
+       {
+               .name = "azoteq,timeout-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x32,
+                       [IQS7211E] = 0x31,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "trackpad reference value update rate",
+       },
+       {
+               .name = "azoteq,timeout-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x49,
+               },
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "trackpad reference value update rate",
+       },
+       {
+               .name = "azoteq,timeout-ati-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x32,
+                       [IQS7211E] = 0x31,
+               },
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "ATI error timeout",
+       },
+       {
+               .name = "azoteq,timeout-ati-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x35,
+               },
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "ATI error timeout",
+       },
+       {
+               .name = "azoteq,timeout-comms-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x33,
+                       [IQS7211A] = 0x4A,
+                       [IQS7211E] = 0x32,
+               },
+               .label = "communication timeout",
+       },
+       {
+               .name = "azoteq,timeout-press-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x34,
+               },
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "press timeout",
+       },
+       {
+               .name = "azoteq,ati-mode",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 15,
+               .reg_width = 1,
+               .label = "ATI mode",
+       },
+       {
+               .name = "azoteq,ati-mode",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 7,
+               .reg_width = 1,
+               .label = "ATI mode",
+       },
+       {
+               .name = "azoteq,sense-mode",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x37,
+                       [IQS7211A] = 0x72,
+                       [IQS7211E] = 0x36,
+               },
+               .reg_shift = 8,
+               .reg_width = 1,
+               .label = "sensing mode",
+       },
+       {
+               .name = "azoteq,sense-mode",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 0,
+               .reg_width = 2,
+               .val_max = 2,
+               .label = "sensing mode",
+       },
+       {
+               .name = "azoteq,fosc-freq",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x38,
+                       [IQS7211A] = 0x52,
+                       [IQS7211E] = 0x35,
+               },
+               .reg_shift = 4,
+               .reg_width = 1,
+               .label = "core clock frequency selection",
+       },
+       {
+               .name = "azoteq,fosc-trim",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x38,
+                       [IQS7211A] = 0x52,
+                       [IQS7211E] = 0x35,
+               },
+               .reg_shift = 0,
+               .reg_width = 4,
+               .label = "core clock frequency trim",
+       },
+       {
+               .name = "azoteq,touch-exit",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x3B,
+                               [IQS7211A] = 0x53,
+                               [IQS7211E] = 0x38,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3E,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "touch exit factor",
+       },
+       {
+               .name = "azoteq,touch-enter",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x3B,
+                               [IQS7211A] = 0x53,
+                               [IQS7211E] = 0x38,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3E,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "touch entrance factor",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3C,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x3D,
+                               [IQS7211A] = 0x54,
+                               [IQS7211E] = 0x39,
+                       },
+               },
+               .label = "threshold",
+       },
+       {
+               .name = "azoteq,debounce-exit",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3F,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x40,
+                               [IQS7211A] = 0x56,
+                               [IQS7211E] = 0x3A,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "debounce exit factor",
+       },
+       {
+               .name = "azoteq,debounce-enter",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3F,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x40,
+                               [IQS7211A] = 0x56,
+                               [IQS7211E] = 0x3A,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "debounce entrance factor",
+       },
+       {
+               .name = "azoteq,conv-frac",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x48,
+                               [IQS7211A] = 0x58,
+                               [IQS7211E] = 0x3D,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x49,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x4A,
+                               [IQS7211A] = 0x59,
+                               [IQS7211E] = 0x3E,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "conversion frequency fractional divider",
+       },
+       {
+               .name = "azoteq,conv-period",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x48,
+                               [IQS7211A] = 0x58,
+                               [IQS7211E] = 0x3D,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x49,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x4A,
+                               [IQS7211A] = 0x59,
+                               [IQS7211E] = 0x3E,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "conversion period",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x55,
+                       [IQS7211A] = 0x67,
+                       [IQS7211E] = 0x48,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "threshold",
+       },
+       {
+               .name = "azoteq,contact-split",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x55,
+                       [IQS7211A] = 0x67,
+                       [IQS7211E] = 0x48,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "contact split factor",
+       },
+       {
+               .name = "azoteq,trim-x",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x56,
+                       [IQS7211E] = 0x49,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "horizontal trim width",
+       },
+       {
+               .name = "azoteq,trim-x",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x68,
+               },
+               .label = "horizontal trim width",
+       },
+       {
+               .name = "azoteq,trim-y",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x56,
+                       [IQS7211E] = 0x49,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "vertical trim height",
+       },
+       {
+               .name = "azoteq,trim-y",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x69,
+               },
+               .label = "vertical trim height",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x59,
+                       [IQS7211A] = 0x81,
+                       [IQS7211E] = 0x4C,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-mid-ms",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x4D,
+               },
+               .label = "repeated gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5A,
+                       [IQS7211A] = 0x82,
+                       [IQS7211E] = 0x4E,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_HOLD,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5A,
+                       [IQS7211A] = 0x82,
+                       [IQS7211E] = 0x4E,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-min-ms",
+               .reg_key = IQS7211_REG_KEY_HOLD,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5B,
+                       [IQS7211A] = 0x83,
+                       [IQS7211E] = 0x4F,
+               },
+               .label = "minimum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5C,
+                       [IQS7211A] = 0x84,
+                       [IQS7211E] = 0x50,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5C,
+                       [IQS7211A] = 0x84,
+                       [IQS7211E] = 0x50,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5D,
+                       [IQS7211A] = 0x85,
+                       [IQS7211E] = 0x51,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5E,
+                       [IQS7211A] = 0x86,
+                       [IQS7211E] = 0x52,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist-rep",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x53,
+               },
+               .label = "repeated gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist-rep",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x54,
+               },
+               .label = "repeated gesture distance",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_key = IQS7211_REG_KEY_PALM,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x55,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_max = 42,
+               .label = "threshold",
+       },
+};
+
+static const u8 iqs7211_gesture_angle[] = {
+       0x00, 0x01, 0x02, 0x03,
+       0x04, 0x06, 0x07, 0x08,
+       0x09, 0x0A, 0x0B, 0x0C,
+       0x0E, 0x0F, 0x10, 0x11,
+       0x12, 0x14, 0x15, 0x16,
+       0x17, 0x19, 0x1A, 0x1B,
+       0x1C, 0x1E, 0x1F, 0x21,
+       0x22, 0x23, 0x25, 0x26,
+       0x28, 0x2A, 0x2B, 0x2D,
+       0x2E, 0x30, 0x32, 0x34,
+       0x36, 0x38, 0x3A, 0x3C,
+       0x3E, 0x40, 0x42, 0x45,
+       0x47, 0x4A, 0x4C, 0x4F,
+       0x52, 0x55, 0x58, 0x5B,
+       0x5F, 0x63, 0x66, 0x6B,
+       0x6F, 0x73, 0x78, 0x7E,
+       0x83, 0x89, 0x90, 0x97,
+       0x9E, 0xA7, 0xB0, 0xBA,
+       0xC5, 0xD1, 0xDF, 0xEF,
+};
+
+struct iqs7211_ver_info {
+       __le16 prod_num;
+       __le16 major;
+       __le16 minor;
+       __le32 patch;
+} __packed;
+
+struct iqs7211_touch_data {
+       __le16 abs_x;
+       __le16 abs_y;
+       __le16 pressure;
+       __le16 area;
+} __packed;
+
+struct iqs7211_tp_config {
+       u8 tp_settings;
+       u8 total_rx;
+       u8 total_tx;
+       u8 num_contacts;
+       __le16 max_x;
+       __le16 max_y;
+} __packed;
+
+struct iqs7211_private {
+       const struct iqs7211_dev_desc *dev_desc;
+       struct gpio_desc *reset_gpio;
+       struct gpio_desc *irq_gpio;
+       struct i2c_client *client;
+       struct input_dev *tp_idev;
+       struct input_dev *kp_idev;
+       struct iqs7211_ver_info ver_info;
+       struct iqs7211_tp_config tp_config;
+       struct touchscreen_properties prop;
+       struct list_head reg_field_head;
+       enum iqs7211_comms_mode comms_init;
+       enum iqs7211_comms_mode comms_mode;
+       unsigned int num_contacts;
+       unsigned int kp_code[ARRAY_SIZE(iqs7211e_kp_events)];
+       u8 rx_tx_map[IQS7211_MAX_CTX + 1];
+       u8 cycle_alloc[2][33];
+       u8 exp_file[2];
+       u16 event_mask;
+       u16 ati_start;
+       u16 gesture_cache;
+};
+
+static int iqs7211_irq_poll(struct iqs7211_private *iqs7211, u64 timeout_us)
+{
+       int error, val;
+
+       error = readx_poll_timeout(gpiod_get_value_cansleep, iqs7211->irq_gpio,
+                                  val, val, IQS7211_COMMS_SLEEP_US, timeout_us);
+
+       return val < 0 ? val : error;
+}
+
+static int iqs7211_hard_reset(struct iqs7211_private *iqs7211)
+{
+       if (!iqs7211->reset_gpio)
+               return 0;
+
+       gpiod_set_value_cansleep(iqs7211->reset_gpio, 1);
+
+       /*
+        * The following delay ensures the shared RDY/MCLR pin is sampled in
+        * between periodic assertions by the device and assumes the default
+        * communication timeout has not been overwritten in OTP memory.
+        */
+       if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+               msleep(IQS7211_RESET_TIMEOUT_MS);
+       else
+               usleep_range(1000, 1100);
+
+       gpiod_set_value_cansleep(iqs7211->reset_gpio, 0);
+       if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+               iqs7211_irq_wait();
+
+       return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+}
+
+static int iqs7211_force_comms(struct iqs7211_private *iqs7211)
+{
+       u8 msg_buf[] = { 0xFF, };
+       int ret;
+
+       switch (iqs7211->comms_mode) {
+       case IQS7211_COMMS_MODE_WAIT:
+               return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+
+       case IQS7211_COMMS_MODE_FREE:
+               return 0;
+
+       case IQS7211_COMMS_MODE_FORCE:
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       /*
+        * The device cannot communicate until it asserts its interrupt (RDY)
+        * pin. Attempts to do so while RDY is deasserted return an ACK; how-
+        * ever all write data is ignored, and all read data returns 0xEE.
+        *
+        * Unsolicited communication must be preceded by a special force com-
+        * munication command, after which the device eventually asserts its
+        * RDY pin and agrees to communicate.
+        *
+        * Regardless of whether communication is forced or the result of an
+        * interrupt, the device automatically deasserts its RDY pin once it
+        * detects an I2C stop condition, or a timeout expires.
+        */
+       ret = gpiod_get_value_cansleep(iqs7211->irq_gpio);
+       if (ret < 0)
+               return ret;
+       else if (ret > 0)
+               return 0;
+
+       ret = i2c_master_send(iqs7211->client, msg_buf, sizeof(msg_buf));
+       if (ret < (int)sizeof(msg_buf)) {
+               if (ret >= 0)
+                       ret = -EIO;
+
+               msleep(IQS7211_COMMS_RETRY_MS);
+               return ret;
+       }
+
+       iqs7211_irq_wait();
+
+       return iqs7211_irq_poll(iqs7211, IQS7211_COMMS_TIMEOUT_US);
+}
+
+static int iqs7211_read_burst(struct iqs7211_private *iqs7211,
+                             u8 reg, void *val, u16 val_len)
+{
+       int ret, i;
+       struct i2c_client *client = iqs7211->client;
+       struct i2c_msg msg[] = {
+               {
+                       .addr = client->addr,
+                       .flags = 0,
+                       .len = sizeof(reg),
+                       .buf = &reg,
+               },
+               {
+                       .addr = client->addr,
+                       .flags = I2C_M_RD,
+                       .len = val_len,
+                       .buf = (u8 *)val,
+               },
+       };
+
+       /*
+        * The following loop protects against an edge case in which the RDY
+        * pin is automatically deasserted just as the read is initiated. In
+        * that case, the read must be retried using forced communication.
+        */
+       for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+               ret = iqs7211_force_comms(iqs7211);
+               if (ret < 0)
+                       continue;
+
+               ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+               if (ret < (int)ARRAY_SIZE(msg)) {
+                       if (ret >= 0)
+                               ret = -EIO;
+
+                       msleep(IQS7211_COMMS_RETRY_MS);
+                       continue;
+               }
+
+               if (get_unaligned_le16(msg[1].buf) == IQS7211_COMMS_ERROR) {
+                       ret = -ENODATA;
+                       continue;
+               }
+
+               ret = 0;
+               break;
+       }
+
+       iqs7211_irq_wait();
+
+       if (ret < 0)
+               dev_err(&client->dev,
+                       "Failed to read from address 0x%02X: %d\n", reg, ret);
+
+       return ret;
+}
+
+static int iqs7211_read_word(struct iqs7211_private *iqs7211, u8 reg, u16 *val)
+{
+       __le16 val_buf;
+       int error;
+
+       error = iqs7211_read_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+       if (error)
+               return error;
+
+       *val = le16_to_cpu(val_buf);
+
+       return 0;
+}
+
+static int iqs7211_write_burst(struct iqs7211_private *iqs7211,
+                              u8 reg, const void *val, u16 val_len)
+{
+       int msg_len = sizeof(reg) + val_len;
+       int ret, i;
+       struct i2c_client *client = iqs7211->client;
+       u8 *msg_buf;
+
+       msg_buf = kzalloc(msg_len, GFP_KERNEL);
+       if (!msg_buf)
+               return -ENOMEM;
+
+       *msg_buf = reg;
+       memcpy(msg_buf + sizeof(reg), val, val_len);
+
+       /*
+        * The following loop protects against an edge case in which the RDY
+        * pin is automatically asserted just before the force communication
+        * command is sent.
+        *
+        * In that case, the subsequent I2C stop condition tricks the device
+        * into preemptively deasserting the RDY pin and the command must be
+        * sent again.
+        */
+       for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+               ret = iqs7211_force_comms(iqs7211);
+               if (ret < 0)
+                       continue;
+
+               ret = i2c_master_send(client, msg_buf, msg_len);
+               if (ret < msg_len) {
+                       if (ret >= 0)
+                               ret = -EIO;
+
+                       msleep(IQS7211_COMMS_RETRY_MS);
+                       continue;
+               }
+
+               ret = 0;
+               break;
+       }
+
+       kfree(msg_buf);
+
+       iqs7211_irq_wait();
+
+       if (ret < 0)
+               dev_err(&client->dev,
+                       "Failed to write to address 0x%02X: %d\n", reg, ret);
+
+       return ret;
+}
+
+static int iqs7211_write_word(struct iqs7211_private *iqs7211, u8 reg, u16 val)
+{
+       __le16 val_buf = cpu_to_le16(val);
+
+       return iqs7211_write_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+}
+
+static int iqs7211_start_comms(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       bool forced_comms;
+       unsigned int val;
+       u16 comms_setup;
+       int error;
+
+       /*
+        * Until forced communication can be enabled, the host must wait for a
+        * communication window each time it intends to elicit a response from
+        * the device.
+        *
+        * Forced communication is not necessary, however, if the host adapter
+        * can support clock stretching. In that case, the device freely clock
+        * stretches until all pending conversions are complete.
+        */
+       forced_comms = device_property_present(&client->dev,
+                                              "azoteq,forced-comms");
+
+       error = device_property_read_u32(&client->dev,
+                                        "azoteq,forced-comms-default", &val);
+       if (error == -EINVAL) {
+               iqs7211->comms_init = IQS7211_COMMS_MODE_WAIT;
+       } else if (error) {
+               dev_err(&client->dev,
+                       "Failed to read default communication mode: %d\n",
+                       error);
+               return error;
+       } else if (val) {
+               iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_FORCE
+                                                  : IQS7211_COMMS_MODE_WAIT;
+       } else {
+               iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_WAIT
+                                                  : IQS7211_COMMS_MODE_FREE;
+       }
+
+       iqs7211->comms_mode = iqs7211->comms_init;
+
+       error = iqs7211_hard_reset(iqs7211);
+       if (error) {
+               dev_err(&client->dev, "Failed to reset device: %d\n", error);
+               return error;
+       }
+
+       error = iqs7211_read_burst(iqs7211, IQS7211_PROD_NUM,
+                                  &iqs7211->ver_info,
+                                  sizeof(iqs7211->ver_info));
+       if (error)
+               return error;
+
+       if (le16_to_cpu(iqs7211->ver_info.prod_num) != dev_desc->prod_num) {
+               dev_err(&client->dev, "Invalid product number: %u\n",
+                       le16_to_cpu(iqs7211->ver_info.prod_num));
+               return -EINVAL;
+       }
+
+       error = iqs7211_read_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                 &comms_setup);
+       if (error)
+               return error;
+
+       if (forced_comms)
+               comms_setup |= dev_desc->comms_req;
+       else
+               comms_setup &= ~dev_desc->comms_req;
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  comms_setup | dev_desc->comms_end);
+       if (error)
+               return error;
+
+       if (forced_comms)
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+       else
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->exp_file,
+                                  iqs7211->exp_file,
+                                  sizeof(iqs7211->exp_file));
+       if (error)
+               return error;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->tp_config,
+                                  &iqs7211->tp_config,
+                                  sizeof(iqs7211->tp_config));
+       if (error)
+               return error;
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  comms_setup);
+       if (error)
+               return error;
+
+       iqs7211->event_mask = comms_setup & ~IQS7211_EVENT_MASK_ALL;
+       iqs7211->event_mask |= (IQS7211_EVENT_MASK_ATI | IQS7211_EVENT_MODE);
+
+       return 0;
+}
+
+static int iqs7211_init_device(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct iqs7211_reg_field_desc *reg_field;
+       __le16 sys_ctrl[] = {
+               cpu_to_le16(dev_desc->ack_reset),
+               cpu_to_le16(iqs7211->event_mask),
+       };
+       int error, i;
+
+       /*
+        * Acknowledge reset before writing any registers in case the device
+        * suffers a spurious reset during initialization. The communication
+        * mode is configured at this time as well.
+        */
+       error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                   sizeof(sys_ctrl));
+       if (error)
+               return error;
+
+       if (iqs7211->event_mask & dev_desc->comms_req)
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+       else
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+       /*
+        * Take advantage of the stop-bit disable function, if available, to
+        * save the trouble of having to reopen a communication window after
+        * each read or write.
+        */
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  iqs7211->event_mask | dev_desc->comms_end);
+       if (error)
+               return error;
+
+       list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+               u16 new_val = reg_field->val;
+
+               if (reg_field->mask < U16_MAX) {
+                       u16 old_val;
+
+                       error = iqs7211_read_word(iqs7211, reg_field->addr,
+                                                 &old_val);
+                       if (error)
+                               return error;
+
+                       new_val = old_val & ~reg_field->mask;
+                       new_val |= reg_field->val;
+
+                       if (new_val == old_val)
+                               continue;
+               }
+
+               error = iqs7211_write_word(iqs7211, reg_field->addr, new_val);
+               if (error)
+                       return error;
+       }
+
+       error = iqs7211_write_burst(iqs7211, dev_desc->tp_config,
+                                   &iqs7211->tp_config,
+                                   sizeof(iqs7211->tp_config));
+       if (error)
+               return error;
+
+       if (**iqs7211->cycle_alloc) {
+               error = iqs7211_write_burst(iqs7211, dev_desc->rx_tx_map,
+                                           &iqs7211->rx_tx_map,
+                                           dev_desc->num_ctx);
+               if (error)
+                       return error;
+
+               for (i = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+                       error = iqs7211_write_burst(iqs7211,
+                                                   dev_desc->cycle_alloc[i],
+                                                   iqs7211->cycle_alloc[i],
+                                                   dev_desc->cycle_limit[i] * 3);
+                       if (error)
+                               return error;
+               }
+       }
+
+       *sys_ctrl = cpu_to_le16(iqs7211->ati_start);
+
+       return iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                  sizeof(sys_ctrl));
+}
+
+static int iqs7211_add_field(struct iqs7211_private *iqs7211,
+                            struct iqs7211_reg_field_desc new_field)
+{
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc *reg_field;
+
+       if (!new_field.addr)
+               return 0;
+
+       list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+               if (reg_field->addr != new_field.addr)
+                       continue;
+
+               reg_field->mask |= new_field.mask;
+               reg_field->val |= new_field.val;
+               return 0;
+       }
+
+       reg_field = devm_kzalloc(&client->dev, sizeof(*reg_field), GFP_KERNEL);
+       if (!reg_field)
+               return -ENOMEM;
+
+       reg_field->addr = new_field.addr;
+       reg_field->mask = new_field.mask;
+       reg_field->val = new_field.val;
+
+       list_add(&reg_field->list, &iqs7211->reg_field_head);
+
+       return 0;
+}
+
+static int iqs7211_parse_props(struct iqs7211_private *iqs7211,
+                              struct fwnode_handle *reg_grp_node,
+                              enum iqs7211_reg_grp_id reg_grp,
+                              enum iqs7211_reg_key_id reg_key)
+{
+       struct i2c_client *client = iqs7211->client;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(iqs7211_props); i++) {
+               const char *name = iqs7211_props[i].name;
+               u8 reg_addr = iqs7211_props[i].reg_addr[reg_grp]
+                                                      [iqs7211->dev_desc -
+                                                       iqs7211_devs];
+               int reg_shift = iqs7211_props[i].reg_shift;
+               int reg_width = iqs7211_props[i].reg_width ? : 16;
+               int val_pitch = iqs7211_props[i].val_pitch ? : 1;
+               int val_min = iqs7211_props[i].val_min;
+               int val_max = iqs7211_props[i].val_max;
+               const char *label = iqs7211_props[i].label ? : name;
+               struct iqs7211_reg_field_desc reg_field;
+               unsigned int val;
+               int error;
+
+               if (iqs7211_props[i].reg_key != reg_key)
+                       continue;
+
+               if (!reg_addr)
+                       continue;
+
+               error = fwnode_property_read_u32(reg_grp_node, name, &val);
+               if (error == -EINVAL) {
+                       continue;
+               } else if (error) {
+                       dev_err(&client->dev, "Failed to read %s %s: %d\n",
+                               fwnode_get_name(reg_grp_node), label, error);
+                       return error;
+               }
+
+               if (!val_max)
+                       val_max = GENMASK(reg_width - 1, 0) * val_pitch;
+
+               if (val < val_min || val > val_max) {
+                       dev_err(&client->dev, "Invalid %s: %u\n", label, val);
+                       return -EINVAL;
+               }
+
+               reg_field.addr = reg_addr;
+               reg_field.mask = GENMASK(reg_shift + reg_width - 1, reg_shift);
+               reg_field.val = val / val_pitch << reg_shift;
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static int iqs7211_parse_event(struct iqs7211_private *iqs7211,
+                              struct fwnode_handle *event_node,
+                              enum iqs7211_reg_grp_id reg_grp,
+                              enum iqs7211_reg_key_id reg_key,
+                              unsigned int *event_code)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc reg_field;
+       unsigned int val;
+       int error;
+
+       error = iqs7211_parse_props(iqs7211, event_node, reg_grp, reg_key);
+       if (error)
+               return error;
+
+       if (reg_key == IQS7211_REG_KEY_AXIAL_X ||
+           reg_key == IQS7211_REG_KEY_AXIAL_Y) {
+               error = fwnode_property_read_u32(event_node,
+                                                "azoteq,gesture-angle", &val);
+               if (!error) {
+                       if (val >= ARRAY_SIZE(iqs7211_gesture_angle)) {
+                               dev_err(&client->dev,
+                                       "Invalid %s gesture angle: %u\n",
+                                       fwnode_get_name(event_node), val);
+                               return -EINVAL;
+                       }
+
+                       reg_field.addr = dev_desc->gesture_angle;
+                       reg_field.mask = U8_MAX;
+                       reg_field.val = iqs7211_gesture_angle[val];
+
+                       error = iqs7211_add_field(iqs7211, reg_field);
+                       if (error)
+                               return error;
+               } else if (error != -EINVAL) {
+                       dev_err(&client->dev,
+                               "Failed to read %s gesture angle: %d\n",
+                               fwnode_get_name(event_node), error);
+                       return error;
+               }
+       }
+
+       error = fwnode_property_read_u32(event_node, "linux,code", event_code);
+       if (error == -EINVAL)
+               error = 0;
+       else if (error)
+               dev_err(&client->dev, "Failed to read %s code: %d\n",
+                       fwnode_get_name(event_node), error);
+
+       return error;
+}
+
+static int iqs7211_parse_cycles(struct iqs7211_private *iqs7211,
+                               struct fwnode_handle *tp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       int num_cycles = dev_desc->cycle_limit[0] + dev_desc->cycle_limit[1];
+       int error, count, i, j, k, cycle_start;
+       unsigned int cycle_alloc[IQS7211_MAX_CYCLES][2];
+       u8 total_rx = iqs7211->tp_config.total_rx;
+       u8 total_tx = iqs7211->tp_config.total_tx;
+
+       for (i = 0; i < IQS7211_MAX_CYCLES * 2; i++)
+               *(cycle_alloc[0] + i) = U8_MAX;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,channel-select");
+       if (count == -EINVAL) {
+               /*
+                * Assign each sensing cycle's slots (0 and 1) to a channel,
+                * defined as the intersection between two CRx and CTx pins.
+                * A channel assignment of 255 means the slot is unused.
+                */
+               for (i = 0, cycle_start = 0; i < total_tx; i++) {
+                       int cycle_stop = 0;
+
+                       for (j = 0; j < total_rx; j++) {
+                               /*
+                                * Channels formed by CRx0-3 and CRx4-7 are
+                                * bound to slots 0 and 1, respectively.
+                                */
+                               int slot = iqs7211->rx_tx_map[j] < 4 ? 0 : 1;
+                               int chan = i * total_rx + j;
+
+                               for (k = cycle_start; k < num_cycles; k++) {
+                                       if (cycle_alloc[k][slot] < U8_MAX)
+                                               continue;
+
+                                       cycle_alloc[k][slot] = chan;
+                                       break;
+                               }
+
+                               if (k < num_cycles) {
+                                       cycle_stop = max(k, cycle_stop);
+                                       continue;
+                               }
+
+                               dev_err(&client->dev,
+                                       "Insufficient number of cycles\n");
+                               return -EINVAL;
+                       }
+
+                       /*
+                        * Sensing cycles cannot straddle more than one CTx
+                        * pin. As such, the next row's starting cycle must
+                        * be greater than the previous row's highest cycle.
+                        */
+                       cycle_start = cycle_stop + 1;
+               }
+       } else if (count < 0) {
+               dev_err(&client->dev, "Failed to count channels: %d\n", count);
+               return count;
+       } else if (count > num_cycles * 2) {
+               dev_err(&client->dev, "Insufficient number of cycles\n");
+               return -EINVAL;
+       } else if (count > 0) {
+               error = fwnode_property_read_u32_array(tp_node,
+                                                      "azoteq,channel-select",
+                                                      cycle_alloc[0], count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read channels: %d\n",
+                               error);
+                       return error;
+               }
+
+               for (i = 0; i < count; i++) {
+                       int chan = *(cycle_alloc[0] + i);
+
+                       if (chan == U8_MAX)
+                               continue;
+
+                       if (chan >= total_rx * total_tx) {
+                               dev_err(&client->dev, "Invalid channel: %d\n",
+                                       chan);
+                               return -EINVAL;
+                       }
+
+                       for (j = 0; j < count; j++) {
+                               if (j == i || *(cycle_alloc[0] + j) != chan)
+                                       continue;
+
+                               dev_err(&client->dev, "Duplicate channel: %d\n",
+                                       chan);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       /*
+        * Once the raw channel assignments have been derived, they must be
+        * packed according to the device's register map.
+        */
+       for (i = 0, cycle_start = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+               int offs = 0;
+
+               for (j = cycle_start;
+                    j < cycle_start + dev_desc->cycle_limit[i]; j++) {
+                       iqs7211->cycle_alloc[i][offs++] = 0x05;
+                       iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][0];
+                       iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][1];
+               }
+
+               cycle_start += dev_desc->cycle_limit[i];
+       }
+
+       return 0;
+}
+
+static int iqs7211_parse_tp(struct iqs7211_private *iqs7211,
+                           struct fwnode_handle *tp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       unsigned int pins[IQS7211_MAX_CTX];
+       int error, count, i, j;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,rx-enable");
+       if (count == -EINVAL) {
+               return 0;
+       } else if (count < 0) {
+               dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+               return count;
+       } else if (count > IQS7211_NUM_CRX) {
+               dev_err(&client->dev, "Invalid number of CRx pins\n");
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tp_node, "azoteq,rx-enable",
+                                              pins, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read CRx pins: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < count; i++) {
+               if (pins[i] >= IQS7211_NUM_CRX) {
+                       dev_err(&client->dev, "Invalid CRx pin: %u\n", pins[i]);
+                       return -EINVAL;
+               }
+
+               iqs7211->rx_tx_map[i] = pins[i];
+       }
+
+       iqs7211->tp_config.total_rx = count;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,tx-enable");
+       if (count < 0) {
+               dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+               return count;
+       } else if (count > dev_desc->num_ctx) {
+               dev_err(&client->dev, "Invalid number of CTx pins\n");
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tp_node, "azoteq,tx-enable",
+                                              pins, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read CTx pins: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < count; i++) {
+               if (pins[i] >= dev_desc->num_ctx) {
+                       dev_err(&client->dev, "Invalid CTx pin: %u\n", pins[i]);
+                       return -EINVAL;
+               }
+
+               for (j = 0; j < iqs7211->tp_config.total_rx; j++) {
+                       if (iqs7211->rx_tx_map[j] != pins[i])
+                               continue;
+
+                       dev_err(&client->dev, "Conflicting CTx pin: %u\n",
+                               pins[i]);
+                       return -EINVAL;
+               }
+
+               iqs7211->rx_tx_map[iqs7211->tp_config.total_rx + i] = pins[i];
+       }
+
+       iqs7211->tp_config.total_tx = count;
+
+       return iqs7211_parse_cycles(iqs7211, tp_node);
+}
+
+static int iqs7211_parse_alp(struct iqs7211_private *iqs7211,
+                            struct fwnode_handle *alp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc reg_field;
+       int error, count, i;
+
+       count = fwnode_property_count_u32(alp_node, "azoteq,rx-enable");
+       if (count < 0 && count != -EINVAL) {
+               dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+               return count;
+       } else if (count > IQS7211_NUM_CRX) {
+               dev_err(&client->dev, "Invalid number of CRx pins\n");
+               return -EINVAL;
+       } else if (count >= 0) {
+               unsigned int pins[IQS7211_NUM_CRX];
+
+               error = fwnode_property_read_u32_array(alp_node,
+                                                      "azoteq,rx-enable",
+                                                      pins, count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read CRx pins: %d\n",
+                               error);
+                       return error;
+               }
+
+               reg_field.addr = dev_desc->alp_config;
+               reg_field.mask = GENMASK(IQS7211_NUM_CRX - 1, 0);
+               reg_field.val = 0;
+
+               for (i = 0; i < count; i++) {
+                       if (pins[i] < dev_desc->min_crx_alp ||
+                           pins[i] >= IQS7211_NUM_CRX) {
+                               dev_err(&client->dev, "Invalid CRx pin: %u\n",
+                                       pins[i]);
+                               return -EINVAL;
+                       }
+
+                       reg_field.val |= BIT(pins[i]);
+               }
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       count = fwnode_property_count_u32(alp_node, "azoteq,tx-enable");
+       if (count < 0 && count != -EINVAL) {
+               dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+               return count;
+       } else if (count > dev_desc->num_ctx) {
+               dev_err(&client->dev, "Invalid number of CTx pins\n");
+               return -EINVAL;
+       } else if (count >= 0) {
+               unsigned int pins[IQS7211_MAX_CTX];
+
+               error = fwnode_property_read_u32_array(alp_node,
+                                                      "azoteq,tx-enable",
+                                                      pins, count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read CTx pins: %d\n",
+                               error);
+                       return error;
+               }
+
+               reg_field.addr = dev_desc->alp_config + 1;
+               reg_field.mask = GENMASK(dev_desc->num_ctx - 1, 0);
+               reg_field.val = 0;
+
+               for (i = 0; i < count; i++) {
+                       if (pins[i] >= dev_desc->num_ctx) {
+                               dev_err(&client->dev, "Invalid CTx pin: %u\n",
+                                       pins[i]);
+                               return -EINVAL;
+                       }
+
+                       reg_field.val |= BIT(pins[i]);
+               }
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static int (*iqs7211_parse_extra[IQS7211_NUM_REG_GRPS])
+                               (struct iqs7211_private *iqs7211,
+                                struct fwnode_handle *reg_grp_node) = {
+       [IQS7211_REG_GRP_TP] = iqs7211_parse_tp,
+       [IQS7211_REG_GRP_ALP] = iqs7211_parse_alp,
+};
+
+static int iqs7211_parse_reg_grp(struct iqs7211_private *iqs7211,
+                                struct fwnode_handle *reg_grp_node,
+                                enum iqs7211_reg_grp_id reg_grp)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct iqs7211_reg_field_desc reg_field;
+       int error, i;
+
+       error = iqs7211_parse_props(iqs7211, reg_grp_node, reg_grp,
+                                   IQS7211_REG_KEY_NONE);
+       if (error)
+               return error;
+
+       if (iqs7211_parse_extra[reg_grp]) {
+               error = iqs7211_parse_extra[reg_grp](iqs7211, reg_grp_node);
+               if (error)
+                       return error;
+       }
+
+       iqs7211->ati_start |= dev_desc->ati_start[reg_grp];
+
+       reg_field.addr = dev_desc->kp_enable[reg_grp];
+       reg_field.mask = 0;
+       reg_field.val = 0;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               const char *event_name = dev_desc->kp_events[i].name;
+               struct fwnode_handle *event_node;
+
+               if (dev_desc->kp_events[i].reg_grp != reg_grp)
+                       continue;
+
+               reg_field.mask |= dev_desc->kp_events[i].enable;
+
+               if (event_name)
+                       event_node = fwnode_get_named_child_node(reg_grp_node,
+                                                                event_name);
+               else
+                       event_node = fwnode_handle_get(reg_grp_node);
+
+               if (!event_node)
+                       continue;
+
+               error = iqs7211_parse_event(iqs7211, event_node,
+                                           dev_desc->kp_events[i].reg_grp,
+                                           dev_desc->kp_events[i].reg_key,
+                                           &iqs7211->kp_code[i]);
+               fwnode_handle_put(event_node);
+               if (error)
+                       return error;
+
+               reg_field.val |= dev_desc->kp_events[i].enable;
+
+               iqs7211->event_mask |= iqs7211_reg_grp_masks[reg_grp];
+       }
+
+       return iqs7211_add_field(iqs7211, reg_field);
+}
+
+static int iqs7211_register_kp(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct input_dev *kp_idev = iqs7211->kp_idev;
+       struct i2c_client *client = iqs7211->client;
+       int error, i;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++)
+               if (iqs7211->kp_code[i])
+                       break;
+
+       if (i == dev_desc->num_kp_events)
+               return 0;
+
+       kp_idev = devm_input_allocate_device(&client->dev);
+       if (!kp_idev)
+               return -ENOMEM;
+
+       iqs7211->kp_idev = kp_idev;
+
+       kp_idev->name = dev_desc->kp_name;
+       kp_idev->id.bustype = BUS_I2C;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++)
+               if (iqs7211->kp_code[i])
+                       input_set_capability(iqs7211->kp_idev, EV_KEY,
+                                            iqs7211->kp_code[i]);
+
+       error = input_register_device(kp_idev);
+       if (error)
+               dev_err(&client->dev, "Failed to register %s: %d\n",
+                       kp_idev->name, error);
+
+       return error;
+}
+
+static int iqs7211_register_tp(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct touchscreen_properties *prop = &iqs7211->prop;
+       struct input_dev *tp_idev = iqs7211->tp_idev;
+       struct i2c_client *client = iqs7211->client;
+       int error;
+
+       error = device_property_read_u32(&client->dev, "azoteq,num-contacts",
+                                        &iqs7211->num_contacts);
+       if (error == -EINVAL) {
+               return 0;
+       } else if (error) {
+               dev_err(&client->dev, "Failed to read number of contacts: %d\n",
+                       error);
+               return error;
+       } else if (iqs7211->num_contacts > IQS7211_MAX_CONTACTS) {
+               dev_err(&client->dev, "Invalid number of contacts: %u\n",
+                       iqs7211->num_contacts);
+               return -EINVAL;
+       }
+
+       iqs7211->tp_config.num_contacts = iqs7211->num_contacts ? : 1;
+
+       if (!iqs7211->num_contacts)
+               return 0;
+
+       iqs7211->event_mask |= IQS7211_EVENT_MASK_MOVE;
+
+       tp_idev = devm_input_allocate_device(&client->dev);
+       if (!tp_idev)
+               return -ENOMEM;
+
+       iqs7211->tp_idev = tp_idev;
+
+       tp_idev->name = dev_desc->tp_name;
+       tp_idev->id.bustype = BUS_I2C;
+
+       input_set_abs_params(tp_idev, ABS_MT_POSITION_X,
+                            0, le16_to_cpu(iqs7211->tp_config.max_x), 0, 0);
+
+       input_set_abs_params(tp_idev, ABS_MT_POSITION_Y,
+                            0, le16_to_cpu(iqs7211->tp_config.max_y), 0, 0);
+
+       input_set_abs_params(tp_idev, ABS_MT_PRESSURE, 0, U16_MAX, 0, 0);
+
+       touchscreen_parse_properties(tp_idev, true, prop);
+
+       /*
+        * The device reserves 0xFFFF for coordinates that correspond to slots
+        * which are not in a state of touch.
+        */
+       if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+               dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+                       prop->max_x, prop->max_y);
+               return -EINVAL;
+       }
+
+       iqs7211->tp_config.max_x = cpu_to_le16(prop->max_x);
+       iqs7211->tp_config.max_y = cpu_to_le16(prop->max_y);
+
+       error = input_mt_init_slots(tp_idev, iqs7211->num_contacts,
+                                   INPUT_MT_DIRECT);
+       if (error) {
+               dev_err(&client->dev, "Failed to initialize slots: %d\n",
+                       error);
+               return error;
+       }
+
+       error = input_register_device(tp_idev);
+       if (error)
+               dev_err(&client->dev, "Failed to register %s: %d\n",
+                       tp_idev->name, error);
+
+       return error;
+}
+
+static int iqs7211_report(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_touch_data *touch_data;
+       u16 info_flags, charge_mode, gesture_flags;
+       __le16 status[12];
+       int error, i;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->sys_stat, status,
+                                  dev_desc->contact_offs * sizeof(__le16) +
+                                  iqs7211->num_contacts * sizeof(*touch_data));
+       if (error)
+               return error;
+
+       info_flags = le16_to_cpu(status[dev_desc->info_offs]);
+
+       if (info_flags & dev_desc->show_reset) {
+               dev_err(&client->dev, "Unexpected device reset\n");
+
+               /*
+                * The device may or may not expect forced communication after
+                * it exits hardware reset, so the corresponding state machine
+                * must be reset as well.
+                */
+               iqs7211->comms_mode = iqs7211->comms_init;
+
+               return iqs7211_init_device(iqs7211);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(dev_desc->ati_error); i++) {
+               if (!(info_flags & dev_desc->ati_error[i]))
+                       continue;
+
+               dev_err(&client->dev, "Unexpected %s ATI error\n",
+                       iqs7211_reg_grp_names[i]);
+               return 0;
+       }
+
+       for (i = 0; i < iqs7211->num_contacts; i++) {
+               u16 pressure;
+
+               touch_data = (struct iqs7211_touch_data *)
+                            &status[dev_desc->contact_offs] + i;
+               pressure = le16_to_cpu(touch_data->pressure);
+
+               input_mt_slot(iqs7211->tp_idev, i);
+               if (input_mt_report_slot_state(iqs7211->tp_idev, MT_TOOL_FINGER,
+                                              pressure != 0)) {
+                       touchscreen_report_pos(iqs7211->tp_idev, &iqs7211->prop,
+                                              le16_to_cpu(touch_data->abs_x),
+                                              le16_to_cpu(touch_data->abs_y),
+                                              true);
+                       input_report_abs(iqs7211->tp_idev, ABS_MT_PRESSURE,
+                                        pressure);
+               }
+       }
+
+       if (iqs7211->num_contacts) {
+               input_mt_sync_frame(iqs7211->tp_idev);
+               input_sync(iqs7211->tp_idev);
+       }
+
+       if (!iqs7211->kp_idev)
+               return 0;
+
+       charge_mode = info_flags & GENMASK(dev_desc->charge_shift + 2,
+                                          dev_desc->charge_shift);
+       charge_mode >>= dev_desc->charge_shift;
+
+       /*
+        * A charging mode higher than 2 (idle mode) indicates the device last
+        * operated in low-power mode and intends to express an ALP event.
+        */
+       if (info_flags & dev_desc->kp_events->mask && charge_mode > 2) {
+               input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 1);
+               input_sync(iqs7211->kp_idev);
+
+               input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 0);
+       }
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_BTN)
+                       continue;
+
+               input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+                                info_flags & dev_desc->kp_events[i].mask);
+       }
+
+       gesture_flags = le16_to_cpu(status[dev_desc->gesture_offs]);
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               enum iqs7211_reg_key_id reg_key = dev_desc->kp_events[i].reg_key;
+               u16 mask = dev_desc->kp_events[i].mask;
+
+               if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_TP)
+                       continue;
+
+               if ((gesture_flags ^ iqs7211->gesture_cache) & mask)
+                       input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+                                        gesture_flags & mask);
+
+               iqs7211->gesture_cache &= ~mask;
+
+               /*
+                * Hold and palm gestures persist while the contact remains in
+                * place; all others are momentary and hence are followed by a
+                * complementary release event.
+                */
+               if (reg_key == IQS7211_REG_KEY_HOLD ||
+                   reg_key == IQS7211_REG_KEY_PALM) {
+                       iqs7211->gesture_cache |= gesture_flags & mask;
+                       gesture_flags &= ~mask;
+               }
+       }
+
+       if (gesture_flags) {
+               input_sync(iqs7211->kp_idev);
+
+               for (i = 0; i < dev_desc->num_kp_events; i++)
+                       if (dev_desc->kp_events[i].reg_grp == IQS7211_REG_GRP_TP &&
+                           gesture_flags & dev_desc->kp_events[i].mask)
+                               input_report_key(iqs7211->kp_idev,
+                                                iqs7211->kp_code[i], 0);
+       }
+
+       input_sync(iqs7211->kp_idev);
+
+       return 0;
+}
+
+static irqreturn_t iqs7211_irq(int irq, void *context)
+{
+       struct iqs7211_private *iqs7211 = context;
+
+       return iqs7211_report(iqs7211) ? IRQ_NONE : IRQ_HANDLED;
+}
+
+static int iqs7211_suspend(struct device *dev)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       int error;
+
+       if (!dev_desc->suspend || device_may_wakeup(dev))
+               return 0;
+
+       /*
+        * I2C communication prompts the device to assert its RDY pin if it is
+        * not already asserted. As such, the interrupt must be disabled so as
+        * to prevent reentrant interrupts.
+        */
+       disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl,
+                                  dev_desc->suspend);
+
+       enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       return error;
+}
+
+static int iqs7211_resume(struct device *dev)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       __le16 sys_ctrl[] = {
+               0,
+               cpu_to_le16(iqs7211->event_mask),
+       };
+       int error;
+
+       if (!dev_desc->suspend || device_may_wakeup(dev))
+               return 0;
+
+       disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       /*
+        * Forced communication, if in use, must be explicitly enabled as part
+        * of the wake-up command.
+        */
+       error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                   sizeof(sys_ctrl));
+
+       enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       return error;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(iqs7211_pm, iqs7211_suspend, iqs7211_resume);
+
+static ssize_t fw_info_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n",
+                        le16_to_cpu(iqs7211->ver_info.prod_num),
+                        le32_to_cpu(iqs7211->ver_info.patch),
+                        le16_to_cpu(iqs7211->ver_info.major),
+                        le16_to_cpu(iqs7211->ver_info.minor),
+                        iqs7211->exp_file[1], iqs7211->exp_file[0]);
+}
+
+static DEVICE_ATTR_RO(fw_info);
+
+static struct attribute *iqs7211_attrs[] = {
+       &dev_attr_fw_info.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(iqs7211);
+
+static const struct of_device_id iqs7211_of_match[] = {
+       {
+               .compatible = "azoteq,iqs7210a",
+               .data = &iqs7211_devs[IQS7210A],
+       },
+       {
+               .compatible = "azoteq,iqs7211a",
+               .data = &iqs7211_devs[IQS7211A],
+       },
+       {
+               .compatible = "azoteq,iqs7211e",
+               .data = &iqs7211_devs[IQS7211E],
+       },
+       { }
+};
+MODULE_DEVICE_TABLE(of, iqs7211_of_match);
+
+static int iqs7211_probe(struct i2c_client *client)
+{
+       struct iqs7211_private *iqs7211;
+       enum iqs7211_reg_grp_id reg_grp;
+       unsigned long irq_flags;
+       bool shared_irq;
+       int error, irq;
+
+       iqs7211 = devm_kzalloc(&client->dev, sizeof(*iqs7211), GFP_KERNEL);
+       if (!iqs7211)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, iqs7211);
+       iqs7211->client = client;
+
+       INIT_LIST_HEAD(&iqs7211->reg_field_head);
+
+       iqs7211->dev_desc = device_get_match_data(&client->dev);
+       if (!iqs7211->dev_desc)
+               return -ENODEV;
+
+       shared_irq = iqs7211->dev_desc->num_ctx == IQS7211_MAX_CTX;
+
+       /*
+        * The RDY pin behaves as an interrupt, but must also be polled ahead
+        * of unsolicited I2C communication. As such, it is first opened as a
+        * GPIO and then passed to gpiod_to_irq() to register the interrupt.
+        *
+        * If an extra CTx pin is present, the RDY and MCLR pins are combined
+        * into a single bidirectional pin. In that case, the platform's GPIO
+        * must be configured as an open-drain output.
+        */
+       iqs7211->irq_gpio = devm_gpiod_get(&client->dev, "irq",
+                                          shared_irq ? GPIOD_OUT_LOW
+                                                     : GPIOD_IN);
+       if (IS_ERR(iqs7211->irq_gpio)) {
+               error = PTR_ERR(iqs7211->irq_gpio);
+               dev_err(&client->dev, "Failed to request IRQ GPIO: %d\n",
+                       error);
+               return error;
+       }
+
+       if (shared_irq) {
+               iqs7211->reset_gpio = iqs7211->irq_gpio;
+       } else {
+               iqs7211->reset_gpio = devm_gpiod_get_optional(&client->dev,
+                                                             "reset",
+                                                             GPIOD_OUT_HIGH);
+               if (IS_ERR(iqs7211->reset_gpio)) {
+                       error = PTR_ERR(iqs7211->reset_gpio);
+                       dev_err(&client->dev,
+                               "Failed to request reset GPIO: %d\n", error);
+                       return error;
+               }
+       }
+
+       error = iqs7211_start_comms(iqs7211);
+       if (error)
+               return error;
+
+       for (reg_grp = 0; reg_grp < IQS7211_NUM_REG_GRPS; reg_grp++) {
+               const char *reg_grp_name = iqs7211_reg_grp_names[reg_grp];
+               struct fwnode_handle *reg_grp_node;
+
+               if (reg_grp_name)
+                       reg_grp_node = device_get_named_child_node(&client->dev,
+                                                                  reg_grp_name);
+               else
+                       reg_grp_node = fwnode_handle_get(dev_fwnode(&client->dev));
+
+               if (!reg_grp_node)
+                       continue;
+
+               error = iqs7211_parse_reg_grp(iqs7211, reg_grp_node, reg_grp);
+               fwnode_handle_put(reg_grp_node);
+               if (error)
+                       return error;
+       }
+
+       error = iqs7211_register_kp(iqs7211);
+       if (error)
+               return error;
+
+       error = iqs7211_register_tp(iqs7211);
+       if (error)
+               return error;
+
+       error = iqs7211_init_device(iqs7211);
+       if (error)
+               return error;
+
+       irq = gpiod_to_irq(iqs7211->irq_gpio);
+       if (irq < 0)
+               return irq;
+
+       irq_flags = gpiod_is_active_low(iqs7211->irq_gpio) ? IRQF_TRIGGER_LOW
+                                                          : IRQF_TRIGGER_HIGH;
+       irq_flags |= IRQF_ONESHOT;
+
+       error = devm_request_threaded_irq(&client->dev, irq, NULL, iqs7211_irq,
+                                         irq_flags, client->name, iqs7211);
+       if (error)
+               dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+
+       return error;
+}
+
+static struct i2c_driver iqs7211_i2c_driver = {
+       .probe = iqs7211_probe,
+       .driver = {
+               .name = "iqs7211",
+               .of_match_table = iqs7211_of_match,
+               .dev_groups = iqs7211_groups,
+               .pm = pm_sleep_ptr(&iqs7211_pm),
+       },
+};
+module_i2c_driver(iqs7211_i2c_driver);
+
+MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
+MODULE_DESCRIPTION("Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller");
+MODULE_LICENSE("GPL");
index 15b5cb7..9bad8b9 100644 (file)
@@ -198,54 +198,36 @@ static void lpc32xx_ts_close(struct input_dev *dev)
 
 static int lpc32xx_ts_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        struct lpc32xx_tsc *tsc;
        struct input_dev *input;
-       struct resource *res;
-       resource_size_t size;
        int irq;
        int error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Can't get memory resource\n");
-               return -ENOENT;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
 
-       tsc = kzalloc(sizeof(*tsc), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!tsc || !input) {
-               dev_err(&pdev->dev, "failed allocating memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
-       }
+       tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
+       if (!tsc)
+               return -ENOMEM;
 
-       tsc->dev = input;
        tsc->irq = irq;
 
-       size = resource_size(res);
-
-       if (!request_mem_region(res->start, size, pdev->name)) {
-               dev_err(&pdev->dev, "TSC registers are not free\n");
-               error = -EBUSY;
-               goto err_free_mem;
-       }
+       tsc->tsc_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(tsc->tsc_base))
+               return PTR_ERR(tsc->tsc_base);
 
-       tsc->tsc_base = ioremap(res->start, size);
-       if (!tsc->tsc_base) {
-               dev_err(&pdev->dev, "Can't map memory\n");
-               error = -ENOMEM;
-               goto err_release_mem;
-       }
-
-       tsc->clk = clk_get(&pdev->dev, NULL);
+       tsc->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(tsc->clk)) {
                dev_err(&pdev->dev, "failed getting clock\n");
-               error = PTR_ERR(tsc->clk);
-               goto err_unmap;
+               return PTR_ERR(tsc->clk);
+       }
+
+       input = devm_input_allocate_device(dev);
+       if (!input) {
+               dev_err(&pdev->dev, "failed allocating input device\n");
+               return -ENOMEM;
        }
 
        input->name = MOD_NAME;
@@ -254,68 +236,33 @@ static int lpc32xx_ts_probe(struct platform_device *pdev)
        input->id.vendor = 0x0001;
        input->id.product = 0x0002;
        input->id.version = 0x0100;
-       input->dev.parent = &pdev->dev;
        input->open = lpc32xx_ts_open;
        input->close = lpc32xx_ts_close;
 
-       input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-       input->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+       input_set_capability(input, EV_KEY, BTN_TOUCH);
        input_set_abs_params(input, ABS_X, LPC32XX_TSC_MIN_XY_VAL,
                             LPC32XX_TSC_MAX_XY_VAL, 0, 0);
        input_set_abs_params(input, ABS_Y, LPC32XX_TSC_MIN_XY_VAL,
                             LPC32XX_TSC_MAX_XY_VAL, 0, 0);
 
        input_set_drvdata(input, tsc);
+       tsc->dev = input;
 
-       error = request_irq(tsc->irq, lpc32xx_ts_interrupt,
-                           0, pdev->name, tsc);
+       error = devm_request_irq(dev, tsc->irq, lpc32xx_ts_interrupt,
+                                0, pdev->name, tsc);
        if (error) {
                dev_err(&pdev->dev, "failed requesting interrupt\n");
-               goto err_put_clock;
+               return error;
        }
 
        error = input_register_device(input);
        if (error) {
                dev_err(&pdev->dev, "failed registering input device\n");
-               goto err_free_irq;
+               return error;
        }
 
        platform_set_drvdata(pdev, tsc);
-       device_init_wakeup(&pdev->dev, 1);
-
-       return 0;
-
-err_free_irq:
-       free_irq(tsc->irq, tsc);
-err_put_clock:
-       clk_put(tsc->clk);
-err_unmap:
-       iounmap(tsc->tsc_base);
-err_release_mem:
-       release_mem_region(res->start, size);
-err_free_mem:
-       input_free_device(input);
-       kfree(tsc);
-
-       return error;
-}
-
-static int lpc32xx_ts_remove(struct platform_device *pdev)
-{
-       struct lpc32xx_tsc *tsc = platform_get_drvdata(pdev);
-       struct resource *res;
-
-       free_irq(tsc->irq, tsc);
-
-       input_unregister_device(tsc->dev);
-
-       clk_put(tsc->clk);
-
-       iounmap(tsc->tsc_base);
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       release_mem_region(res->start, resource_size(res));
-
-       kfree(tsc);
+       device_init_wakeup(&pdev->dev, true);
 
        return 0;
 }
@@ -384,7 +331,6 @@ MODULE_DEVICE_TABLE(of, lpc32xx_tsc_of_match);
 
 static struct platform_driver lpc32xx_ts_driver = {
        .probe          = lpc32xx_ts_probe,
-       .remove         = lpc32xx_ts_remove,
        .driver         = {
                .name   = MOD_NAME,
                .pm     = LPC32XX_TS_PM_OPS,
index 32896e5..2ac4483 100644 (file)
@@ -1451,13 +1451,8 @@ static int mip4_probe(struct i2c_client *client)
 
        ts->gpio_ce = devm_gpiod_get_optional(&client->dev,
                                              "ce", GPIOD_OUT_LOW);
-       if (IS_ERR(ts->gpio_ce)) {
-               error = PTR_ERR(ts->gpio_ce);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->gpio_ce))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->gpio_ce), "Failed to get gpio\n");
 
        error = mip4_power_on(ts);
        if (error)
index ac12494..af233b6 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/i2c.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
@@ -43,6 +42,7 @@
 /* Touchscreen absolute values */
 #define MMS114_MAX_AREA                        0xff
 
+#define MMS114_MAX_TOUCHKEYS           15
 #define MMS114_MAX_TOUCH               10
 #define MMS114_EVENT_SIZE              8
 #define MMS136_EVENT_SIZE              6
@@ -70,6 +70,9 @@ struct mms114_data {
        unsigned int            contact_threshold;
        unsigned int            moving_threshold;
 
+       u32 keycodes[MMS114_MAX_TOUCHKEYS];
+       int num_keycodes;
+
        /* Use cache data for mode control register(write only) */
        u8                      cache_mode_control;
 };
@@ -167,11 +170,6 @@ static void mms114_process_mt(struct mms114_data *data, struct mms114_touch *tou
                return;
        }
 
-       if (touch->type != MMS114_TYPE_TOUCHSCREEN) {
-               dev_err(&client->dev, "Wrong touch type (%d)\n", touch->type);
-               return;
-       }
-
        id = touch->id - 1;
        x = touch->x_lo | touch->x_hi << 8;
        y = touch->y_lo | touch->y_hi << 8;
@@ -191,9 +189,33 @@ static void mms114_process_mt(struct mms114_data *data, struct mms114_touch *tou
        }
 }
 
+static void mms114_process_touchkey(struct mms114_data *data,
+                                   struct mms114_touch *touch)
+{
+       struct i2c_client *client = data->client;
+       struct input_dev *input_dev = data->input_dev;
+       unsigned int keycode_id;
+
+       if (touch->id == 0)
+               return;
+
+       if (touch->id > data->num_keycodes) {
+               dev_err(&client->dev, "Wrong touch id for touchkey (%d)\n",
+                       touch->id);
+               return;
+       }
+
+       keycode_id = touch->id - 1;
+       dev_dbg(&client->dev, "keycode id: %d, pressed: %d\n", keycode_id,
+               touch->pressed);
+
+       input_report_key(input_dev, data->keycodes[keycode_id], touch->pressed);
+}
+
 static irqreturn_t mms114_interrupt(int irq, void *dev_id)
 {
        struct mms114_data *data = dev_id;
+       struct i2c_client *client = data->client;
        struct input_dev *input_dev = data->input_dev;
        struct mms114_touch touch[MMS114_MAX_TOUCH];
        int packet_size;
@@ -223,8 +245,22 @@ static irqreturn_t mms114_interrupt(int irq, void *dev_id)
        if (error < 0)
                goto out;
 
-       for (index = 0; index < touch_size; index++)
-               mms114_process_mt(data, touch + index);
+       for (index = 0; index < touch_size; index++) {
+               switch (touch[index].type) {
+               case MMS114_TYPE_TOUCHSCREEN:
+                       mms114_process_mt(data, touch + index);
+                       break;
+
+               case MMS114_TYPE_TOUCHKEY:
+                       mms114_process_touchkey(data, touch + index);
+                       break;
+
+               default:
+                       dev_err(&client->dev, "Wrong touch type (%d)\n",
+                               touch[index].type);
+                       break;
+               }
+       }
 
        input_mt_report_pointer_emulation(data->input_dev, true);
        input_sync(data->input_dev);
@@ -446,6 +482,7 @@ static int mms114_probe(struct i2c_client *client)
        struct input_dev *input_dev;
        const void *match_data;
        int error;
+       int i;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
                dev_err(&client->dev, "Not supported I2C adapter\n");
@@ -469,6 +506,42 @@ static int mms114_probe(struct i2c_client *client)
 
        data->type = (enum mms_type)match_data;
 
+       data->num_keycodes = device_property_count_u32(&client->dev,
+                                                      "linux,keycodes");
+       if (data->num_keycodes == -EINVAL) {
+               data->num_keycodes = 0;
+       } else if (data->num_keycodes < 0) {
+               dev_err(&client->dev,
+                       "Unable to parse linux,keycodes property: %d\n",
+                       data->num_keycodes);
+               return data->num_keycodes;
+       } else if (data->num_keycodes > MMS114_MAX_TOUCHKEYS) {
+               dev_warn(&client->dev,
+                       "Found %d linux,keycodes but max is %d, ignoring the rest\n",
+                        data->num_keycodes, MMS114_MAX_TOUCHKEYS);
+               data->num_keycodes = MMS114_MAX_TOUCHKEYS;
+       }
+
+       if (data->num_keycodes > 0) {
+               error = device_property_read_u32_array(&client->dev,
+                                                      "linux,keycodes",
+                                                      data->keycodes,
+                                                      data->num_keycodes);
+               if (error) {
+                       dev_err(&client->dev,
+                               "Unable to read linux,keycodes values: %d\n",
+                               error);
+                       return error;
+               }
+
+               input_dev->keycode = data->keycodes;
+               input_dev->keycodemax = data->num_keycodes;
+               input_dev->keycodesize = sizeof(data->keycodes[0]);
+               for (i = 0; i < data->num_keycodes; i++)
+                       input_set_capability(input_dev,
+                                            EV_KEY, data->keycodes[i]);
+       }
+
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
        input_set_abs_params(input_dev, ABS_MT_PRESSURE, 0, 255, 0, 0);
index 7f7d879..1a797e4 100644 (file)
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Driver for Novatek i2c touchscreen controller as found on
- * the Acer Iconia One 7 B1-750 tablet. The Touchscreen controller
- * model-number is unknown. Android calls this a "NVT-ts" touchscreen,
- * but that may apply to other Novatek controller models too.
+ * Driver for Novatek NT11205 i2c touchscreen controller as found
+ * on the Acer Iconia One 7 B1-750 tablet.
  *
  * Copyright (c) 2023 Hans de Goede <hdegoede@redhat.com>
  */
@@ -272,7 +270,7 @@ static int nvt_ts_probe(struct i2c_client *client)
 
        error = input_register_device(input);
        if (error) {
-               dev_err(dev, "failed to request irq: %d\n", error);
+               dev_err(dev, "failed to register input device: %d\n", error);
                return error;
        }
 
@@ -296,6 +294,6 @@ static struct i2c_driver nvt_ts_driver = {
 
 module_i2c_driver(nvt_ts_driver);
 
-MODULE_DESCRIPTION("Novatek NVT-ts touchscreen driver");
+MODULE_DESCRIPTION("Novatek NT11205 touchscreen driver");
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_LICENSE("GPL");
index 554e179..4ede068 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 
 #define PIXCIR_MAX_SLOTS       5 /* Max fingers supported by driver */
@@ -515,41 +515,27 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client)
        input_set_drvdata(input, tsdata);
 
        tsdata->gpio_attb = devm_gpiod_get(dev, "attb", GPIOD_IN);
-       if (IS_ERR(tsdata->gpio_attb)) {
-               error = PTR_ERR(tsdata->gpio_attb);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request ATTB gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_attb))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_attb),
+                                    "Failed to request ATTB gpio\n");
 
        tsdata->gpio_reset = devm_gpiod_get_optional(dev, "reset",
                                                     GPIOD_OUT_LOW);
-       if (IS_ERR(tsdata->gpio_reset)) {
-               error = PTR_ERR(tsdata->gpio_reset);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request RESET gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_reset))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_reset),
+                                    "Failed to request RESET gpio\n");
 
        tsdata->gpio_wake = devm_gpiod_get_optional(dev, "wake",
                                                    GPIOD_OUT_HIGH);
-       if (IS_ERR(tsdata->gpio_wake)) {
-               error = PTR_ERR(tsdata->gpio_wake);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get wake gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_wake))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_wake),
+                                    "Failed to get wake gpio\n");
 
        tsdata->gpio_enable = devm_gpiod_get_optional(dev, "enable",
                                                      GPIOD_OUT_HIGH);
-       if (IS_ERR(tsdata->gpio_enable)) {
-               error = PTR_ERR(tsdata->gpio_enable);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get enable gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_enable))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_enable),
+                                    "Failed to get enable gpio\n");
 
        if (tsdata->gpio_enable)
                msleep(100);
index 76e7d62..78dd305 100644 (file)
@@ -1087,32 +1087,20 @@ static int raydium_i2c_probe(struct i2c_client *client)
        i2c_set_clientdata(client, ts);
 
        ts->avdd = devm_regulator_get(&client->dev, "avdd");
-       if (IS_ERR(ts->avdd)) {
-               error = PTR_ERR(ts->avdd);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'avdd' regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->avdd))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->avdd),
+                                    "Failed to get 'avdd' regulator\n");
 
        ts->vccio = devm_regulator_get(&client->dev, "vccio");
-       if (IS_ERR(ts->vccio)) {
-               error = PTR_ERR(ts->vccio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vccio' regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->vccio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+                                    "Failed to get 'vccio' regulator\n");
 
        ts->reset_gpio = devm_gpiod_get_optional(&client->dev, "reset",
                                                 GPIOD_OUT_LOW);
-       if (IS_ERR(ts->reset_gpio)) {
-               error = PTR_ERR(ts->reset_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to get reset gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->reset_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+                                    "Failed to get reset gpio\n");
 
        error = raydium_i2c_power_on(ts);
        if (error)
index 6f754a8..7e761ec 100644 (file)
@@ -210,12 +210,8 @@ static int grts_probe(struct platform_device *pdev)
 
        /* get the channels from IIO device */
        st->iio_chans = devm_iio_channel_get_all(dev);
-       if (IS_ERR(st->iio_chans)) {
-               error = PTR_ERR(st->iio_chans);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "can't get iio channels.\n");
-               return error;
-       }
+       if (IS_ERR(st->iio_chans))
+               return dev_err_probe(dev, PTR_ERR(st->iio_chans), "can't get iio channels\n");
 
        if (!device_property_present(dev, "io-channel-names"))
                return -ENODEV;
index 9e28f96..62f562a 100644 (file)
@@ -706,11 +706,9 @@ static int silead_ts_probe(struct i2c_client *client)
 
        /* Power GPIO pin */
        data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
-       if (IS_ERR(data->gpio_power)) {
-               if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
-                       dev_err(dev, "Shutdown GPIO request failed\n");
-               return PTR_ERR(data->gpio_power);
-       }
+       if (IS_ERR(data->gpio_power))
+               return dev_err_probe(dev, PTR_ERR(data->gpio_power),
+                                    "Shutdown GPIO request failed\n");
 
        error = silead_ts_setup(client);
        if (error)
index 426564d..ed56cb5 100644 (file)
@@ -310,23 +310,15 @@ static int sis_ts_probe(struct i2c_client *client)
 
        ts->attn_gpio = devm_gpiod_get_optional(&client->dev,
                                                "attn", GPIOD_IN);
-       if (IS_ERR(ts->attn_gpio)) {
-               error = PTR_ERR(ts->attn_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get attention GPIO: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->attn_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->attn_gpio),
+                                    "Failed to get attention GPIO\n");
 
        ts->reset_gpio = devm_gpiod_get_optional(&client->dev,
                                                 "reset", GPIOD_OUT_LOW);
-       if (IS_ERR(ts->reset_gpio)) {
-               error = PTR_ERR(ts->reset_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get reset GPIO: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->reset_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+                                    "Failed to get reset GPIO\n");
 
        sis_ts_reset(ts);
 
index 31d1402..7efbcd0 100644 (file)
@@ -221,7 +221,6 @@ static void surface3_spi_power(struct surface3_ts_data *data, bool on)
  */
 static int surface3_spi_get_gpio_config(struct surface3_ts_data *data)
 {
-       int error;
        struct device *dev;
        struct gpio_desc *gpiod;
        int i;
@@ -231,15 +230,9 @@ static int surface3_spi_get_gpio_config(struct surface3_ts_data *data)
        /* Get the reset lines GPIO pin number */
        for (i = 0; i < 2; i++) {
                gpiod = devm_gpiod_get_index(dev, NULL, i, GPIOD_OUT_LOW);
-               if (IS_ERR(gpiod)) {
-                       error = PTR_ERR(gpiod);
-                       if (error != -EPROBE_DEFER)
-                               dev_err(dev,
-                                       "Failed to get power GPIO %d: %d\n",
-                                       i,
-                                       error);
-                       return error;
-               }
+               if (IS_ERR(gpiod))
+                       return dev_err_probe(dev, PTR_ERR(gpiod),
+                                            "Failed to get power GPIO %d\n", i);
 
                data->gpiod_rst[i] = gpiod;
        }
index 0293c49..f5c5881 100644 (file)
@@ -323,13 +323,9 @@ static int sx8654_probe(struct i2c_client *client)
 
        sx8654->gpio_reset = devm_gpiod_get_optional(&client->dev, "reset",
                                                     GPIOD_OUT_HIGH);
-       if (IS_ERR(sx8654->gpio_reset)) {
-               error = PTR_ERR(sx8654->gpio_reset);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "unable to get reset-gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(sx8654->gpio_reset))
+               return dev_err_probe(&client->dev, PTR_ERR(sx8654->gpio_reset),
+                                    "unable to get reset-gpio\n");
        dev_dbg(&client->dev, "got GPIO reset pin\n");
 
        sx8654->data = device_get_match_data(&client->dev);
index decf2d2..9aa4e35 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/sort.h>
 #include <linux/pm_wakeirq.h>
 
index 22243ca..537f7bf 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #define INTR_STAT_OFS  0x0
 #define INTR_SET_OFS   0x8
index aa0a4d8..27a510d 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define INTR_STAT_OFS  0x0
 #define INTR_SET_OFS   0x8
index bf6e86b..a2b8839 100644 (file)
@@ -1501,16 +1501,12 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
        mbox->dev = dev;
        platform_set_drvdata(pdev, mbox);
 
-       /* Get resource for registers */
-       iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       /* Get resource for registers and map registers of all rings */
+       mbox->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &iomem);
        if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) {
                ret = -ENODEV;
                goto fail;
-       }
-
-       /* Map registers of all rings */
-       mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
-       if (IS_ERR(mbox->regs)) {
+       } else if (IS_ERR(mbox->regs)) {
                ret = PTR_ERR(mbox->regs);
                goto fail;
        }
index 8c95e3c..d67db63 100644 (file)
@@ -694,7 +694,7 @@ pdc_receive(struct pdc_state *pdcs)
  * pdc_tx_list_sg_add() - Add the buffers in a scatterlist to the transmit
  * descriptors for a given SPU. The scatterlist buffers contain the data for a
  * SPU request message.
- * @spu_idx:   The index of the SPU to submit the request to, [0, max_spu)
+ * @pdcs:      PDC state for the SPU that will process this request
  * @sg:        Scatterlist whose buffers contain part of the SPU request
  *
  * If a scatterlist buffer is larger than PDC_DMA_BUF_MAX, multiple descriptors
@@ -861,7 +861,7 @@ static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg,
  * pdc_rx_list_sg_add() - Add the buffers in a scatterlist to the receive
  * descriptors for a given SPU. The caller must have already DMA mapped the
  * scatterlist.
- * @spu_idx:    Indicates which SPU the buffers are for
+ * @pdcs:       PDC state for the SPU that will process this request
  * @sg:         Scatterlist whose buffers are added to the receive ring
  *
  * If a receive buffer in the scatterlist is larger than PDC_DMA_BUF_MAX,
@@ -960,7 +960,7 @@ static irqreturn_t pdc_irq_handler(int irq, void *data)
 /**
  * pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after
  * a DMA receive interrupt. Reenables the receive interrupt.
- * @data: PDC state structure
+ * @t: Pointer to the Altera sSGDMA channel structure
  */
 static void pdc_tasklet_cb(struct tasklet_struct *t)
 {
@@ -1566,19 +1566,13 @@ static int pdc_probe(struct platform_device *pdev)
        if (err)
                goto cleanup_ring_pool;
 
-       pdc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!pdc_regs) {
-               err = -ENODEV;
-               goto cleanup_ring_pool;
-       }
-       dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
-               &pdc_regs->start, &pdc_regs->end);
-
-       pdcs->pdc_reg_vbase = devm_ioremap_resource(&pdev->dev, pdc_regs);
+       pdcs->pdc_reg_vbase = devm_platform_get_and_ioremap_resource(pdev, 0, &pdc_regs);
        if (IS_ERR(pdcs->pdc_reg_vbase)) {
                err = PTR_ERR(pdcs->pdc_reg_vbase);
                goto cleanup_ring_pool;
        }
+       dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
+               &pdc_regs->start, &pdc_regs->end);
 
        /* create rx buffer pool after dt read to know how big buffers are */
        err = pdc_rx_buf_pool_create(pdcs);
index ab24e73..17c29e9 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
index 1c73c63..f77741c 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kfifo.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
index 20f2ec8..3ef4dd8 100644 (file)
@@ -14,7 +14,8 @@
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
index 162df49..20ee283 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
 #include <soc/microchip/mpfs.h>
index fc6a12a..22d6018 100644 (file)
@@ -367,8 +367,7 @@ static int mbox_test_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        /* It's okay for MMIO to be NULL */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res);
+       tdev->tx_mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (PTR_ERR(tdev->tx_mmio) == -EBUSY) {
                /* if reserved area in SRAM, try just ioremap */
                size = resource_size(res);
@@ -378,8 +377,7 @@ static int mbox_test_probe(struct platform_device *pdev)
        }
 
        /* If specified, second reg entry is Rx MMIO */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res);
+       tdev->rx_mmio = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
        if (PTR_ERR(tdev->rx_mmio) == -EBUSY) {
                size = resource_size(res);
                tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size);
@@ -390,7 +388,7 @@ static int mbox_test_probe(struct platform_device *pdev)
        tdev->tx_channel = mbox_test_request_channel(pdev, "tx");
        tdev->rx_channel = mbox_test_request_channel(pdev, "rx");
 
-       if (!tdev->tx_channel && !tdev->rx_channel)
+       if (IS_ERR_OR_NULL(tdev->tx_channel) && IS_ERR_OR_NULL(tdev->rx_channel))
                return -EPROBE_DEFER;
 
        /* If Rx is not specified but has Rx MMIO, then Rx = Tx */
index adf36c0..ebff3ba 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/mailbox_client.h>
 #include <linux/mailbox_controller.h>
+#include <linux/of.h>
 
 #include "mailbox.h"
 
index 14bc005..91487aa 100644 (file)
@@ -10,7 +10,8 @@
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 struct mtk_adsp_mbox_priv {
index b18d47e..4d62b07 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
 #include <linux/mailbox/mtk-cmdq-mailbox.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #define CMDQ_OP_CODE_MASK              (0xff << CMDQ_OP_CODE_SHIFT)
 #define CMDQ_NUM_CMD(t)                        (t->cmd_buf_size / CMDQ_INST_SIZE)
index fa2ce32..792bcae 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/kfifo.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/omap-mailbox.h>
index a5922ac..834aecd 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
@@ -135,10 +136,8 @@ static int platform_mhu_probe(struct platform_device *pdev)
        for (i = 0; i < MHU_CHANS; i++) {
                mhu->chan[i].con_priv = &mhu->mlink[i];
                mhu->mlink[i].irq = platform_get_irq(pdev, i);
-               if (mhu->mlink[i].irq < 0) {
-                       dev_err(dev, "failed to get irq%d\n", i);
+               if (mhu->mlink[i].irq < 0)
                        return mhu->mlink[i].irq;
-               }
                mhu->mlink[i].rx_reg = mhu->base + platform_mhu_reg[i];
                mhu->mlink[i].tx_reg = mhu->mlink[i].rx_reg + TX_REG_OFFSET;
        }
index 7e27acf..f597a1b 100644 (file)
@@ -227,10 +227,8 @@ static int qcom_ipcc_setup_mbox(struct qcom_ipcc *ipcc,
                        ret = of_parse_phandle_with_args(client_dn, "mboxes",
                                                "#mbox-cells", j, &curr_ph);
                        of_node_put(curr_ph.np);
-                       if (!ret && curr_ph.np == controller_dn) {
+                       if (!ret && curr_ph.np == controller_dn)
                                ipcc->num_chans++;
-                               break;
-                       }
                }
        }
 
index 116286e..8ffad05 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
+#include <linux/of.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #define MAILBOX_A2B_INTEN              0x00
@@ -194,11 +194,7 @@ static int rockchip_mbox_probe(struct platform_device *pdev)
        mb->mbox.ops = &rockchip_mbox_chan_ops;
        mb->mbox.txdone_irq = true;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       mb->mbox_base = devm_ioremap_resource(&pdev->dev, res);
+       mb->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(mb->mbox_base))
                return PTR_ERR(mb->mbox_base);
 
index e3c899a..9ae57de 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 
index 15d538f..4ad3653 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 
index 7f98e74..fe29fc2 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
@@ -728,7 +727,6 @@ static int tegra_hsp_request_shared_irq(struct tegra_hsp *hsp)
 static int tegra_hsp_probe(struct platform_device *pdev)
 {
        struct tegra_hsp *hsp;
-       struct resource *res;
        unsigned int i;
        u32 value;
        int err;
@@ -742,8 +740,7 @@ static int tegra_hsp_probe(struct platform_device *pdev)
        INIT_LIST_HEAD(&hsp->doorbells);
        spin_lock_init(&hsp->lock);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hsp->regs = devm_ioremap_resource(&pdev->dev, res);
+       hsp->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hsp->regs))
                return PTR_ERR(hsp->regs);
 
index 03048cb..a94577f 100644 (file)
@@ -812,7 +812,6 @@ static int ti_msgmgr_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        const struct of_device_id *of_id;
        struct device_node *np;
-       struct resource *res;
        const struct ti_msgmgr_desc *desc;
        struct ti_msgmgr_inst *inst;
        struct ti_queue_inst *qinst;
@@ -843,22 +842,19 @@ static int ti_msgmgr_probe(struct platform_device *pdev)
        inst->dev = dev;
        inst->desc = desc;
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                          desc->data_region_name);
-       inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+       inst->queue_proxy_region =
+               devm_platform_ioremap_resource_byname(pdev, desc->data_region_name);
        if (IS_ERR(inst->queue_proxy_region))
                return PTR_ERR(inst->queue_proxy_region);
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                          desc->status_region_name);
-       inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+       inst->queue_state_debug_region =
+               devm_platform_ioremap_resource_byname(pdev, desc->status_region_name);
        if (IS_ERR(inst->queue_state_debug_region))
                return PTR_ERR(inst->queue_state_debug_region);
 
        if (desc->is_sproxy) {
-               res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                                  desc->ctrl_region_name);
-               inst->queue_ctrl_region = devm_ioremap_resource(dev, res);
+               inst->queue_ctrl_region =
+                       devm_platform_ioremap_resource_byname(pdev, desc->ctrl_region_name);
                if (IS_ERR(inst->queue_ctrl_region))
                        return PTR_ERR(inst->queue_ctrl_region);
        }
index d097f45..e4fcac9 100644 (file)
@@ -16,8 +16,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 
 /* IPI agent ID any */
index 6673122..42db767 100644 (file)
@@ -2335,13 +2335,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
 {
        struct ksz_device *dev = ds->priv;
 
-       if (dev->chip_id == KSZ8830_CHIP_ID) {
+       switch (dev->chip_id) {
+       case KSZ8830_CHIP_ID:
                /* Silicon Errata Sheet (DS80000830A):
                 * Port 1 does not work with LinkMD Cable-Testing.
                 * Port 1 does not respond to received PAUSE control frames.
                 */
                if (!port)
                        return MICREL_KSZ8_P1_ERRATA;
+               break;
+       case KSZ9477_CHIP_ID:
+               /* KSZ9477 Errata DS80000754C
+                *
+                * Module 4: Energy Efficient Ethernet (EEE) feature select must
+                * be manually disabled
+                *   The EEE feature is enabled by default, but it is not fully
+                *   operational. It must be manually disabled through register
+                *   controls. If not disabled, the PHY ports can auto-negotiate
+                *   to enable EEE, and this feature can cause link drops when
+                *   linked to another device supporting EEE.
+                */
+               return MICREL_NO_EEE;
        }
 
        return 0;
index dee35ba..0617d5c 100644 (file)
@@ -132,6 +132,8 @@ struct sja1105_info {
        int max_frame_mem;
        int num_ports;
        bool multiple_cascade_ports;
+       /* Every {port, TXQ} has its own CBS shaper */
+       bool fixed_cbs_mapping;
        enum dsa_tag_protocol tag_proto;
        const struct sja1105_dynamic_table_ops *dyn_ops;
        const struct sja1105_table_ops *static_ops;
index 331bb1c..a23d980 100644 (file)
@@ -2115,11 +2115,36 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
 }
 
 #define BYTES_PER_KBIT (1000LL / 8)
+/* Port 0 (the uC port) does not have CBS shapers */
+#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
+
+static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
+                                  int port, int prio)
+{
+       int i;
+
+       if (priv->info->fixed_cbs_mapping) {
+               i = SJA1110_FIXED_CBS(port, prio);
+               if (i >= 0 && i < priv->info->num_cbs_shapers)
+                       return i;
+
+               return -1;
+       }
+
+       for (i = 0; i < priv->info->num_cbs_shapers; i++)
+               if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
+                       return i;
+
+       return -1;
+}
 
 static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
 {
        int i;
 
+       if (priv->info->fixed_cbs_mapping)
+               return -1;
+
        for (i = 0; i < priv->info->num_cbs_shapers; i++)
                if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
                        return i;
@@ -2150,14 +2175,20 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
        struct sja1105_cbs_entry *cbs;
+       s64 port_transmit_rate_kbps;
        int index;
 
        if (!offload->enable)
                return sja1105_delete_cbs_shaper(priv, port, offload->queue);
 
-       index = sja1105_find_unused_cbs_shaper(priv);
-       if (index < 0)
-               return -ENOSPC;
+       /* The user may be replacing an existing shaper */
+       index = sja1105_find_cbs_shaper(priv, port, offload->queue);
+       if (index < 0) {
+               /* That isn't the case - see if we can allocate a new one */
+               index = sja1105_find_unused_cbs_shaper(priv);
+               if (index < 0)
+                       return -ENOSPC;
+       }
 
        cbs = &priv->cbs[index];
        cbs->port = port;
@@ -2167,9 +2198,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
         */
        cbs->credit_hi = offload->hicredit;
        cbs->credit_lo = abs(offload->locredit);
-       /* User space is in kbits/sec, hardware in bytes/sec */
-       cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
-       cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
+       /* User space is in kbits/sec, while the hardware in bytes/sec times
+        * link speed. Since the given offload->sendslope is good only for the
+        * current link speed anyway, and user space is likely to reprogram it
+        * when that changes, don't even bother to track the port's link speed,
+        * but deduce the port transmit rate from idleslope - sendslope.
+        */
+       port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
+       cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
+                                 port_transmit_rate_kbps);
+       cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
+                                 port_transmit_rate_kbps);
        /* Convert the negative values from 64-bit 2's complement
         * to 32-bit 2's complement (for the case of 0x80000000 whose
         * negative is still negative).
index 5ce29c8..834b5c1 100644 (file)
@@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
index e0a4cb7..c153dc0 100644 (file)
@@ -1402,7 +1402,7 @@ static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
                return;
 
        si = enetc_psi_create(pdev);
-       if (si)
+       if (!IS_ERR(si))
                enetc_psi_destroy(pdev);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
index ea0e38b..f281e42 100644 (file)
@@ -570,7 +570,10 @@ static int gve_rx_append_frags(struct napi_struct *napi,
                if (!skb)
                        return -1;
 
-               skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
+               if (rx->ctx.skb_tail == rx->ctx.skb_head)
+                       skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
+               else
+                       rx->ctx.skb_tail->next = skb;
                rx->ctx.skb_tail = skb;
                num_frags = 0;
        }
index a4b43bc..aaf1f42 100644 (file)
@@ -814,6 +814,7 @@ struct hnae3_tc_info {
        u8 max_tc; /* Total number of TCs */
        u8 num_tc; /* Total number of enabled TCs */
        bool mqprio_active;
+       bool dcb_ets_active;
 };
 
 #define HNAE3_MAX_DSCP                 64
index f276b5e..b850853 100644 (file)
@@ -1045,6 +1045,7 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
        struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
        struct hnae3_knic_private_info *kinfo = &h->kinfo;
+       struct net_device *dev = kinfo->netdev;
 
        *pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n");
        *pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n",
@@ -1087,6 +1088,9 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
                          dev_specs->mc_mac_size);
        *pos += scnprintf(buf + *pos, len - *pos, "MAC statistics number: %u\n",
                          dev_specs->mac_stats_num);
+       *pos += scnprintf(buf + *pos, len - *pos,
+                         "TX timeout threshold: %d seconds\n",
+                         dev->watchdog_timeo / HZ);
 }
 
 static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
@@ -1411,9 +1415,9 @@ int hns3_dbg_init(struct hnae3_handle *handle)
        return 0;
 
 out:
-       mutex_destroy(&handle->dbgfs_lock);
        debugfs_remove_recursive(handle->hnae3_dbgfs);
        handle->hnae3_dbgfs = NULL;
+       mutex_destroy(&handle->dbgfs_lock);
        return ret;
 }
 
@@ -1421,6 +1425,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
 {
        u32 i;
 
+       debugfs_remove_recursive(handle->hnae3_dbgfs);
+       handle->hnae3_dbgfs = NULL;
+
        for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
                if (handle->dbgfs_buf[i]) {
                        kvfree(handle->dbgfs_buf[i]);
@@ -1428,8 +1435,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
                }
 
        mutex_destroy(&handle->dbgfs_lock);
-       debugfs_remove_recursive(handle->hnae3_dbgfs);
-       handle->hnae3_dbgfs = NULL;
 }
 
 void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
index eac2d05..b4895c7 100644 (file)
@@ -2103,8 +2103,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
         */
        if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
            !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
+               /* This smp_store_release() pairs with smp_load_aquire() in
+                * hns3_nic_reclaim_desc(). Ensure that the BD valid bit
+                * is updated.
+                */
+               smp_store_release(&ring->last_to_use, ring->next_to_use);
                hns3_tx_push_bd(ring, num);
-               WRITE_ONCE(ring->last_to_use, ring->next_to_use);
                return;
        }
 
@@ -2115,6 +2119,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
                return;
        }
 
+       /* This smp_store_release() pairs with smp_load_aquire() in
+        * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated.
+        */
+       smp_store_release(&ring->last_to_use, ring->next_to_use);
+
        if (ring->tqp->mem_base)
                hns3_tx_mem_doorbell(ring);
        else
@@ -2122,7 +2131,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
                       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 
        ring->pending_buf = 0;
-       WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
 
 static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
@@ -3308,8 +3316,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
 
-       netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
-
        netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
@@ -3563,9 +3569,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
                                  int *bytes, int *pkts, int budget)
 {
-       /* pair with ring->last_to_use update in hns3_tx_doorbell(),
-        * smp_store_release() is not used in hns3_tx_doorbell() because
-        * the doorbell operation already have the needed barrier operation.
+       /* This smp_load_acquire() pairs with smp_store_release() in
+        * hns3_tx_doorbell().
         */
        int ltu = smp_load_acquire(&ring->last_to_use);
        int ntc = ring->next_to_clean;
index 36858a7..682239f 100644 (file)
@@ -773,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
                hns3_get_ksettings(h, cmd);
                break;
        case HNAE3_MEDIA_TYPE_FIBER:
-               if (module_type == HNAE3_MODULE_TYPE_CR)
+               if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
+                       cmd->base.port = PORT_OTHER;
+               else if (module_type == HNAE3_MODULE_TYPE_CR)
                        cmd->base.port = PORT_DA;
                else
                        cmd->base.port = PORT_FIBRE;
index fad5a5f..b98301e 100644 (file)
@@ -259,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
        int ret;
 
        if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
-           hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+           h->kinfo.tc_info.mqprio_active)
                return -EINVAL;
 
        ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
@@ -275,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
        }
 
        hclge_tm_schd_info_update(hdev, num_tc);
-       if (num_tc > 1)
-               hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+       h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
 
        ret = hclge_ieee_ets_to_tm_info(hdev, ets);
        if (ret)
@@ -487,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h)
        struct hclge_vport *vport = hclge_get_vport(h);
        struct hclge_dev *hdev = vport->back;
 
-       if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+       if (h->kinfo.tc_info.mqprio_active)
                return 0;
 
        return hdev->dcbx_cap;
@@ -611,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
                return -EBUSY;
 
-       if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
+       kinfo = &vport->nic.kinfo;
+       if (kinfo->tc_info.dcb_ets_active)
                return -EINVAL;
 
        ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
@@ -625,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (ret)
                return ret;
 
-       kinfo = &vport->nic.kinfo;
        memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
        hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
        kinfo->tc_info.mqprio_active = tc > 0;
@@ -634,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (ret)
                goto err_out;
 
-       hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
-
-       if (tc > 1)
-               hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
-
        return hclge_notify_init_up(hdev);
 
 err_out:
index f01a7a9..ff3f8f4 100644 (file)
@@ -1519,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
        struct hclge_desc desc[3];
        int pos = 0;
        int ret, i;
-       u32 *req;
+       __le32 *req;
 
        hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
        desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
@@ -1544,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
                         tcam_msg.loc);
 
        /* tcam_data0 ~ tcam_data1 */
-       req = (u32 *)req1->tcam_data;
+       req = (__le32 *)req1->tcam_data;
        for (i = 0; i < 2; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        /* tcam_data2 ~ tcam_data7 */
-       req = (u32 *)req2->tcam_data;
+       req = (__le32 *)req2->tcam_data;
        for (i = 0; i < 6; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        /* tcam_data8 ~ tcam_data12 */
-       req = (u32 *)req3->tcam_data;
+       req = (__le32 *)req3->tcam_data;
        for (i = 0; i < 5; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        return ret;
 }
index 0f50dba..8ca3684 100644 (file)
@@ -11026,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
 
 static void hclge_info_show(struct hclge_dev *hdev)
 {
+       struct hnae3_handle *handle = &hdev->vport->nic;
        struct device *dev = &hdev->pdev->dev;
 
        dev_info(dev, "PF info begin:\n");
@@ -11042,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev)
        dev_info(dev, "This is %s PF\n",
                 hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
        dev_info(dev, "DCB %s\n",
-                hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
+                handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
        dev_info(dev, "MQPRIO %s\n",
-                hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+                handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
        dev_info(dev, "Default tx spare buffer size: %u\n",
                 hdev->tx_spare_buf_size);
 
index ec233ec..7bc2049 100644 (file)
@@ -919,8 +919,6 @@ struct hclge_dev {
 
 #define HCLGE_FLAG_MAIN                        BIT(0)
 #define HCLGE_FLAG_DCB_CAPABLE         BIT(1)
-#define HCLGE_FLAG_DCB_ENABLE          BIT(2)
-#define HCLGE_FLAG_MQPRIO_ENABLE       BIT(3)
        u32 flag;
 
        u32 pkt_buf_size; /* Total pf buf size for tx/rx */
index 015b781..a2b7595 100644 (file)
@@ -34,11 +34,11 @@ struct igb_adapter;
 /* TX/RX descriptor defines */
 #define IGB_DEFAULT_TXD                256
 #define IGB_DEFAULT_TX_WORK    128
-#define IGB_MIN_TXD            80
+#define IGB_MIN_TXD            64
 #define IGB_MAX_TXD            4096
 
 #define IGB_DEFAULT_RXD                256
-#define IGB_MIN_RXD            80
+#define IGB_MIN_RXD            64
 #define IGB_MAX_RXD            4096
 
 #define IGB_DEFAULT_ITR                3 /* dynamic */
index 1ab787e..13ba9c7 100644 (file)
@@ -3933,8 +3933,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
        struct pci_dev *pdev = adapter->pdev;
        struct e1000_hw *hw = &adapter->hw;
 
-       /* Virtualization features not supported on i210 family. */
-       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
+       /* Virtualization features not supported on i210 and 82580 family. */
+       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
+           (hw->mac.type == e1000_82580))
                return;
 
        /* Of the below we really only want the effect of getting
index 57d39ee..7b83678 100644 (file)
@@ -39,11 +39,11 @@ enum latency_range {
 /* Tx/Rx descriptor defines */
 #define IGBVF_DEFAULT_TXD      256
 #define IGBVF_MAX_TXD          4096
-#define IGBVF_MIN_TXD          80
+#define IGBVF_MIN_TXD          64
 
 #define IGBVF_DEFAULT_RXD      256
 #define IGBVF_MAX_RXD          4096
-#define IGBVF_MIN_RXD          80
+#define IGBVF_MIN_RXD          64
 
 #define IGBVF_MIN_ITR_USECS    10 /* 100000 irq/sec */
 #define IGBVF_MAX_ITR_USECS    10000 /* 100    irq/sec */
index 8ebe699..f48f82d 100644 (file)
@@ -379,11 +379,11 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
 /* TX/RX descriptor defines */
 #define IGC_DEFAULT_TXD                256
 #define IGC_DEFAULT_TX_WORK    128
-#define IGC_MIN_TXD            80
+#define IGC_MIN_TXD            64
 #define IGC_MAX_TXD            4096
 
 #define IGC_DEFAULT_RXD                256
-#define IGC_MIN_RXD            80
+#define IGC_MIN_RXD            64
 #define IGC_MAX_RXD            4096
 
 /* Supported Rx Buffer Sizes */
index c2f6867..23c2f2e 100644 (file)
@@ -846,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
        return 0;
 }
 
+static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
+                              u16 *smq, u16 *smq_mask)
+{
+       struct nix_cn10k_aq_enq_req *aq_req;
+
+       if (!is_rvu_otx2(rvu)) {
+               aq_req = (struct nix_cn10k_aq_enq_req *)req;
+               *smq = aq_req->sq.smq;
+               *smq_mask = aq_req->sq_mask.smq;
+       } else {
+               *smq = req->sq.smq;
+               *smq_mask = req->sq_mask.smq;
+       }
+}
+
 static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
                                   struct nix_aq_enq_req *req,
                                   struct nix_aq_enq_rsp *rsp)
@@ -857,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
        struct rvu_block *block;
        struct admin_queue *aq;
        struct rvu_pfvf *pfvf;
+       u16 smq, smq_mask;
        void *ctx, *mask;
        bool ena;
        u64 cfg;
@@ -928,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
        if (rc)
                return rc;
 
+       nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
        /* Check if SQ pointed SMQ belongs to this PF/VF or not */
        if (req->ctype == NIX_AQ_CTYPE_SQ &&
            ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
             (req->op == NIX_AQ_INSTOP_WRITE &&
-             req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
+             req->sq_mask.ena && req->sq.ena && smq_mask))) {
                if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
-                                    pcifunc, req->sq.smq))
+                                    pcifunc, smq))
                        return NIX_AF_ERR_AQ_ENQUEUE;
        }
 
index 92d3952..feeb416 100644 (file)
@@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
-       if (mlx5e_is_eswitch_flow(parse_state->flow))
+       if (mlx5e_is_eswitch_flow(parse_state->flow)) {
                attr->esw_attr->split_count = attr->esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        attr->flags |= MLX5_ATTR_FLAG_CT;
 
index 291193f..f63402c 100644 (file)
@@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
+       parse_state->if_count = 0;
        esw_attr->out_count++;
        return 0;
 }
index 3b272bb..368a95f 100644 (file)
@@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
 
        attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
-       if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+       if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
                esw_attr->split_count = esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        return 0;
 }
index ad09a8a..2d1d4a0 100644 (file)
@@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
+       parse_state->if_count = 0;
        esw_attr->out_count++;
 
        return 0;
index c8a3eaf..a13c5e7 100644 (file)
@@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                return err;
 
        esw_attr->split_count = esw_attr->out_count;
+       parse_state->if_count = 0;
 
        return 0;
 }
index 310b992..f17575b 100644 (file)
@@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
-       if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+       if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
                attr->esw_attr->split_count = attr->esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        return 0;
 }
index 3180836..c24828b 100644 (file)
@@ -3936,6 +3936,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
                        }
 
                        i_split = i + 1;
+                       parse_state->if_count = 0;
                        list_add(&attr->list, &flow->attrs);
                }
 
index 6cd7d64..d4cde65 100644 (file)
@@ -1276,12 +1276,19 @@ int
 mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
                                 enum mlx5_eswitch_vport_event enabled_events)
 {
+       bool pf_needed;
        int ret;
 
+       pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+                   esw->mode == MLX5_ESWITCH_LEGACY;
+
        /* Enable PF vport */
-       ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
-       if (ret)
-               return ret;
+       if (pf_needed) {
+               ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
+                                                   enabled_events);
+               if (ret)
+                       return ret;
+       }
 
        /* Enable external host PF HCA */
        ret = host_pf_enable_hca(esw->dev);
@@ -1317,7 +1324,8 @@ ec_vf_err:
 ecpf_err:
        host_pf_disable_hca(esw->dev);
 pf_hca_err:
-       mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+       if (pf_needed)
+               mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
        return ret;
 }
 
@@ -1335,7 +1343,10 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
        }
 
        host_pf_disable_hca(esw->dev);
-       mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+           esw->mode == MLX5_ESWITCH_LEGACY)
+               mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
 }
 
 static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
index 752fb0d..b296ac5 100644 (file)
@@ -3216,26 +3216,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
        esw_acl_ingress_ofld_cleanup(esw, vport);
 }
 
-static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
-       struct mlx5_vport *vport;
+       struct mlx5_vport *uplink, *manager;
+       int ret;
 
-       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
+       uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+       if (IS_ERR(uplink))
+               return PTR_ERR(uplink);
+
+       ret = esw_vport_create_offloads_acl_tables(esw, uplink);
+       if (ret)
+               return ret;
+
+       manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(manager)) {
+               ret = PTR_ERR(manager);
+               goto err_manager;
+       }
 
-       return esw_vport_create_offloads_acl_tables(esw, vport);
+       ret = esw_vport_create_offloads_acl_tables(esw, manager);
+       if (ret)
+               goto err_manager;
+
+       return 0;
+
+err_manager:
+       esw_vport_destroy_offloads_acl_tables(esw, uplink);
+       return ret;
 }
 
-static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
        struct mlx5_vport *vport;
 
-       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
-       if (IS_ERR(vport))
-               return;
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (!IS_ERR(vport))
+               esw_vport_destroy_offloads_acl_tables(esw, vport);
 
-       esw_vport_destroy_offloads_acl_tables(esw, vport);
+       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+       if (!IS_ERR(vport))
+               esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
 int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
@@ -3280,7 +3301,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
        }
        esw->fdb_table.offloads.indir = indir;
 
-       err = esw_create_uplink_offloads_acl_tables(esw);
+       err = esw_create_offloads_acl_tables(esw);
        if (err)
                goto create_acl_err;
 
@@ -3321,7 +3342,7 @@ create_fdb_err:
 create_restore_err:
        esw_destroy_offloads_table(esw);
 create_offloads_err:
-       esw_destroy_uplink_offloads_acl_tables(esw);
+       esw_destroy_offloads_acl_tables(esw);
 create_acl_err:
        mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
 create_indir_err:
@@ -3337,7 +3358,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
        esw_destroy_offloads_fdb_tables(esw);
        esw_destroy_restore_table(esw);
        esw_destroy_offloads_table(esw);
-       esw_destroy_uplink_offloads_acl_tables(esw);
+       esw_destroy_offloads_acl_tables(esw);
        mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
        mutex_destroy(&esw->fdb_table.offloads.vports.lock);
 }
index 2375cef..f77a2d3 100644 (file)
@@ -359,26 +359,36 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 /* Handle a received packet.  Second half: Touches packet payload. */
 void __efx_rx_packet(struct efx_channel *channel)
 {
+       struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
        struct efx_nic *efx = channel->efx;
        struct efx_rx_buffer *rx_buf =
-               efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+               efx_rx_buffer(rx_queue, channel->rx_pkt_index);
        u8 *eh = efx_rx_buf_va(rx_buf);
 
        /* Read length from the prefix if necessary.  This already
         * excludes the length of the prefix itself.
         */
-       if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
+       if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) {
                rx_buf->len = le16_to_cpup((__le16 *)
                                           (eh + efx->rx_packet_len_offset));
+               /* A known issue may prevent this being filled in;
+                * if that happens, just drop the packet.
+                * Must do that in the driver since passing a zero-length
+                * packet up to the stack may cause a crash.
+                */
+               if (unlikely(!rx_buf->len)) {
+                       efx_free_rx_buffers(rx_queue, rx_buf,
+                                           channel->rx_pkt_n_frags);
+                       channel->n_rx_frm_trunc++;
+                       goto out;
+               }
+       }
 
        /* If we're in loopback test, then pass the packet directly to the
         * loopback layer, and free the rx_buf here
         */
        if (unlikely(efx->loopback_selftest)) {
-               struct efx_rx_queue *rx_queue;
-
                efx_loopback_rx_packet(efx, eh, rx_buf->len);
-               rx_queue = efx_channel_get_rx_queue(channel);
                efx_free_rx_buffers(rx_queue, rx_buf,
                                    channel->rx_pkt_n_frags);
                goto out;
index 35f4b14..0f28795 100644 (file)
@@ -419,9 +419,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
                return ERR_PTR(phy_mode);
 
        plat->phy_interface = phy_mode;
-       plat->mac_interface = stmmac_of_get_mac_mode(np);
-       if (plat->mac_interface < 0)
-               plat->mac_interface = plat->phy_interface;
+       rc = stmmac_of_get_mac_mode(np);
+       plat->mac_interface = rc < 0 ? plat->phy_interface : rc;
 
        /* Some wrapper drivers still rely on phy_node. Let's save it while
         * they are not converted to phylink. */
index c3f3066..b7e1514 100644 (file)
@@ -1330,8 +1330,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
        struct crypto_aead *tfm;
        int ret;
 
-       /* Pick a sync gcm(aes) cipher to ensure order is preserved. */
-       tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+       tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
 
        if (IS_ERR(tfm))
                return tfm;
index b6d7981..927d3d5 100644 (file)
@@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
        /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
        {0x1c, 0x04, 0x00d0},
 
-       /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
-       {0x07, 0x3c, 0x0000},
-
        /* Register settings are required to meet data sheet supply current specifications */
        {0x1c, 0x13, 0x6eff},
        {0x1c, 0x14, 0xe6ff},
@@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev)
                        return err;
        }
 
+       /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
+        * in this switch shall be regarded as broken.
+        */
+       if (phydev->dev_flags & MICREL_NO_EEE)
+               phydev->eee_broken_modes = -1;
+
        err = genphy_restart_aneg(phydev);
        if (err)
                return err;
index d43e62e..9c6f4f8 100644 (file)
@@ -344,6 +344,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
        struct veth_rq *rq = NULL;
+       int ret = NETDEV_TX_OK;
        struct net_device *rcv;
        int length = skb->len;
        bool use_napi = false;
@@ -378,11 +379,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        } else {
 drop:
                atomic64_inc(&priv->dropped);
+               ret = NET_XMIT_DROP;
        }
 
        rcu_read_unlock();
 
-       return NETDEV_TX_OK;
+       return ret;
 }
 
 static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
index dca25a0..3ae4b41 100644 (file)
@@ -336,6 +336,7 @@ MODULE_DEVICE_TABLE(of, of_nxp_nci_i2c_match);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id acpi_id[] = {
        { "NXP1001" },
+       { "NXP1002" },
        { "NXP7471" },
        { }
 };
index e5a2ac4..8fcaa26 100644 (file)
@@ -749,6 +749,8 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 
        /* Enable all counters */
        armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+
+       kvm_vcpu_pmu_resync_el0();
 }
 
 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
index 0a8f597..365d964 100644 (file)
@@ -25,7 +25,7 @@
 #include "../cxl/pmu.h"
 
 #define CXL_PMU_CAP_REG                        0x0
-#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(4, 0)
+#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(5, 0)
 #define   CXL_PMU_CAP_COUNTER_WIDTH_MSK                        GENMASK_ULL(15, 8)
 #define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK          GENMASK_ULL(24, 20)
 #define   CXL_PMU_CAP_FILTERS_SUP_MSK                  GENMASK_ULL(39, 32)
index 5c2e6d5..40a2cc6 100644 (file)
@@ -658,8 +658,6 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
        [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
-       [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
-                               RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
        [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
                            RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
        [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
@@ -1458,7 +1456,7 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
                        }
                }
 
-               if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
+               if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
                        rd->rpl[i].name = NULL;
        }
 }
index 3daccea..dc66e34 100644 (file)
@@ -8,8 +8,8 @@
 
 #include <linux/acpi.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/pwm.h>
-#include <linux/radix-tree.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/err.h>
@@ -127,28 +127,28 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 }
 
 struct pwm_device *
-of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (pc->of_pwm_n_cells < 2)
+       if (chip->of_pwm_n_cells < 2)
                return ERR_PTR(-EINVAL);
 
        /* flags in the third cell are optional */
        if (args->args_count < 2)
                return ERR_PTR(-EINVAL);
 
-       if (args->args[0] >= pc->npwm)
+       if (args->args[0] >= chip->npwm)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+       pwm = pwm_request_from_chip(chip, args->args[0], NULL);
        if (IS_ERR(pwm))
                return pwm;
 
        pwm->args.period = args->args[1];
        pwm->args.polarity = PWM_POLARITY_NORMAL;
 
-       if (pc->of_pwm_n_cells >= 3) {
+       if (chip->of_pwm_n_cells >= 3) {
                if (args->args_count > 2 && args->args[2] & PWM_POLARITY_INVERTED)
                        pwm->args.polarity = PWM_POLARITY_INVERSED;
        }
@@ -158,18 +158,18 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
 EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags);
 
 struct pwm_device *
-of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (pc->of_pwm_n_cells < 1)
+       if (chip->of_pwm_n_cells < 1)
                return ERR_PTR(-EINVAL);
 
        /* validate that one cell is specified, optionally with flags */
        if (args->args_count != 1 && args->args_count != 2)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, 0, NULL);
+       pwm = pwm_request_from_chip(chip, 0, NULL);
        if (IS_ERR(pwm))
                return pwm;
 
@@ -312,22 +312,19 @@ EXPORT_SYMBOL_GPL(pwmchip_add);
  * pwmchip_remove() - remove a PWM chip
  * @chip: the PWM chip to remove
  *
- * Removes a PWM chip. This function may return busy if the PWM chip provides
- * a PWM device that is still requested.
- *
- * Returns: 0 on success or a negative error code on failure.
+ * Removes a PWM chip.
  */
 void pwmchip_remove(struct pwm_chip *chip)
 {
        pwmchip_sysfs_unexport(chip);
 
+       if (IS_ENABLED(CONFIG_OF))
+               of_pwmchip_remove(chip);
+
        mutex_lock(&pwm_lock);
 
        list_del_init(&chip->list);
 
-       if (IS_ENABLED(CONFIG_OF))
-               of_pwmchip_remove(chip);
-
        free_pwms(chip);
 
        mutex_unlock(&pwm_lock);
@@ -692,7 +689,7 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
        struct pwm_device *pwm = NULL;
        struct of_phandle_args args;
        struct device_link *dl;
-       struct pwm_chip *pc;
+       struct pwm_chip *chip;
        int index = 0;
        int err;
 
@@ -709,16 +706,16 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
                return ERR_PTR(err);
        }
 
-       pc = fwnode_to_pwmchip(of_fwnode_handle(args.np));
-       if (IS_ERR(pc)) {
-               if (PTR_ERR(pc) != -EPROBE_DEFER)
+       chip = fwnode_to_pwmchip(of_fwnode_handle(args.np));
+       if (IS_ERR(chip)) {
+               if (PTR_ERR(chip) != -EPROBE_DEFER)
                        pr_err("%s(): PWM chip not found\n", __func__);
 
-               pwm = ERR_CAST(pc);
+               pwm = ERR_CAST(chip);
                goto put;
        }
 
-       pwm = pc->of_xlate(pc, &args);
+       pwm = chip->of_xlate(chip, &args);
        if (IS_ERR(pwm))
                goto put;
 
index a38a62e..8e7d67f 100644 (file)
@@ -12,6 +12,7 @@
  * - When APPLE_PWM_CTRL is set to 0, the output is constant low
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 96a709a..e271d92 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/delay.h>
 #include <linux/mfd/atmel-hlcdc.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
@@ -38,11 +39,11 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
        return container_of(chip, struct atmel_hlcdc_pwm, chip);
 }
 
-static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
+static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                                 const struct pwm_state *state)
 {
-       struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
-       struct atmel_hlcdc *hlcdc = chip->hlcdc;
+       struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip);
+       struct atmel_hlcdc *hlcdc = atmel->hlcdc;
        unsigned int status;
        int ret;
 
@@ -54,7 +55,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                u32 pwmcfg;
                int pres;
 
-               if (!chip->errata || !chip->errata->slow_clk_erratum) {
+               if (!atmel->errata || !atmel->errata->slow_clk_erratum) {
                        clk_freq = clk_get_rate(new_clk);
                        if (!clk_freq)
                                return -EINVAL;
@@ -64,7 +65,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                }
 
                /* Errata: cannot use slow clk on some IP revisions */
-               if ((chip->errata && chip->errata->slow_clk_erratum) ||
+               if ((atmel->errata && atmel->errata->slow_clk_erratum) ||
                    clk_period_ns > state->period) {
                        new_clk = hlcdc->sys_clk;
                        clk_freq = clk_get_rate(new_clk);
@@ -77,8 +78,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
 
                for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
                /* Errata: cannot divide by 1 on some IP revisions */
-                       if (!pres && chip->errata &&
-                           chip->errata->div1_clk_erratum)
+                       if (!pres && atmel->errata &&
+                           atmel->errata->div1_clk_erratum)
                                continue;
 
                        if ((clk_period_ns << pres) >= state->period)
@@ -90,7 +91,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
 
                pwmcfg = ATMEL_HLCDC_PWMPS(pres);
 
-               if (new_clk != chip->cur_clk) {
+               if (new_clk != atmel->cur_clk) {
                        u32 gencfg = 0;
                        int ret;
 
@@ -98,8 +99,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                        if (ret)
                                return ret;
 
-                       clk_disable_unprepare(chip->cur_clk);
-                       chip->cur_clk = new_clk;
+                       clk_disable_unprepare(atmel->cur_clk);
+                       atmel->cur_clk = new_clk;
 
                        if (new_clk == hlcdc->sys_clk)
                                gencfg = ATMEL_HLCDC_CLKPWMSEL;
@@ -160,8 +161,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                if (ret)
                        return ret;
 
-               clk_disable_unprepare(chip->cur_clk);
-               chip->cur_clk = NULL;
+               clk_disable_unprepare(atmel->cur_clk);
+               atmel->cur_clk = NULL;
        }
 
        return 0;
@@ -183,31 +184,32 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = {
 #ifdef CONFIG_PM_SLEEP
 static int atmel_hlcdc_pwm_suspend(struct device *dev)
 {
-       struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+       struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
 
        /* Keep the periph clock enabled if the PWM is still running. */
-       if (pwm_is_enabled(&chip->chip.pwms[0]))
-               clk_disable_unprepare(chip->hlcdc->periph_clk);
+       if (pwm_is_enabled(&atmel->chip.pwms[0]))
+               clk_disable_unprepare(atmel->hlcdc->periph_clk);
 
        return 0;
 }
 
 static int atmel_hlcdc_pwm_resume(struct device *dev)
 {
-       struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+       struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
        struct pwm_state state;
        int ret;
 
-       pwm_get_state(&chip->chip.pwms[0], &state);
+       pwm_get_state(&atmel->chip.pwms[0], &state);
 
        /* Re-enable the periph clock it was stopped during suspend. */
        if (!state.enabled) {
-               ret = clk_prepare_enable(chip->hlcdc->periph_clk);
+               ret = clk_prepare_enable(atmel->hlcdc->periph_clk);
                if (ret)
                        return ret;
        }
 
-       return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state);
+       return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0],
+                                    &state);
 }
 #endif
 
@@ -244,14 +246,14 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 {
        const struct of_device_id *match;
        struct device *dev = &pdev->dev;
-       struct atmel_hlcdc_pwm *chip;
+       struct atmel_hlcdc_pwm *atmel;
        struct atmel_hlcdc *hlcdc;
        int ret;
 
        hlcdc = dev_get_drvdata(dev->parent);
 
-       chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
-       if (!chip)
+       atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL);
+       if (!atmel)
                return -ENOMEM;
 
        ret = clk_prepare_enable(hlcdc->periph_clk);
@@ -260,31 +262,31 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 
        match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node);
        if (match)
-               chip->errata = match->data;
+               atmel->errata = match->data;
 
-       chip->hlcdc = hlcdc;
-       chip->chip.ops = &atmel_hlcdc_pwm_ops;
-       chip->chip.dev = dev;
-       chip->chip.npwm = 1;
+       atmel->hlcdc = hlcdc;
+       atmel->chip.ops = &atmel_hlcdc_pwm_ops;
+       atmel->chip.dev = dev;
+       atmel->chip.npwm = 1;
 
-       ret = pwmchip_add(&chip->chip);
+       ret = pwmchip_add(&atmel->chip);
        if (ret) {
                clk_disable_unprepare(hlcdc->periph_clk);
                return ret;
        }
 
-       platform_set_drvdata(pdev, chip);
+       platform_set_drvdata(pdev, atmel);
 
        return 0;
 }
 
 static void atmel_hlcdc_pwm_remove(struct platform_device *pdev)
 {
-       struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
+       struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev);
 
-       pwmchip_remove(&chip->chip);
+       pwmchip_remove(&atmel->chip);
 
-       clk_disable_unprepare(chip->hlcdc->periph_clk);
+       clk_disable_unprepare(atmel->hlcdc->periph_clk);
 }
 
 static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = {
index 4a116dc..c00dd37 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <soc/at91/atmel_tcb.h>
@@ -34,7 +33,6 @@
                                 ATMEL_TC_BEEVT | ATMEL_TC_BSWTRG)
 
 struct atmel_tcb_pwm_device {
-       enum pwm_polarity polarity;     /* PWM polarity */
        unsigned div;                   /* PWM clock divider */
        unsigned duty;                  /* PWM duty expressed in clk cycles */
        unsigned period;                /* PWM period expressed in clk cycles */
@@ -57,7 +55,7 @@ struct atmel_tcb_pwm_chip {
        struct clk *clk;
        struct clk *gclk;
        struct clk *slow_clk;
-       struct atmel_tcb_pwm_device *pwms[NPWM];
+       struct atmel_tcb_pwm_device pwms[NPWM];
        struct atmel_tcb_channel bkup;
 };
 
@@ -68,37 +66,18 @@ static inline struct atmel_tcb_pwm_chip *to_tcb_chip(struct pwm_chip *chip)
        return container_of(chip, struct atmel_tcb_pwm_chip, chip);
 }
 
-static int atmel_tcb_pwm_set_polarity(struct pwm_chip *chip,
-                                     struct pwm_device *pwm,
-                                     enum pwm_polarity polarity)
-{
-       struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
-
-       tcbpwm->polarity = polarity;
-
-       return 0;
-}
-
 static int atmel_tcb_pwm_request(struct pwm_chip *chip,
                                 struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm;
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        unsigned cmr;
        int ret;
 
-       tcbpwm = devm_kzalloc(chip->dev, sizeof(*tcbpwm), GFP_KERNEL);
-       if (!tcbpwm)
-               return -ENOMEM;
-
        ret = clk_prepare_enable(tcbpwmc->clk);
-       if (ret) {
-               devm_kfree(chip->dev, tcbpwm);
+       if (ret)
                return ret;
-       }
 
-       tcbpwm->polarity = PWM_POLARITY_NORMAL;
        tcbpwm->duty = 0;
        tcbpwm->period = 0;
        tcbpwm->div = 0;
@@ -131,27 +110,22 @@ static int atmel_tcb_pwm_request(struct pwm_chip *chip,
        regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
        spin_unlock(&tcbpwmc->lock);
 
-       tcbpwmc->pwms[pwm->hwpwm] = tcbpwm;
-
        return 0;
 }
 
 static void atmel_tcb_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
 
        clk_disable_unprepare(tcbpwmc->clk);
-       tcbpwmc->pwms[pwm->hwpwm] = NULL;
-       devm_kfree(chip->dev, tcbpwm);
 }
 
-static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
+                                 enum pwm_polarity polarity)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        unsigned cmr;
-       enum pwm_polarity polarity = tcbpwm->polarity;
 
        /*
         * If duty is 0 the timer will be stopped and we have to
@@ -203,12 +177,12 @@ static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        spin_unlock(&tcbpwmc->lock);
 }
 
-static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
+                               enum pwm_polarity polarity)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        u32 cmr;
-       enum pwm_polarity polarity = tcbpwm->polarity;
 
        /*
         * If duty is 0 the timer will be stopped and we have to
@@ -291,7 +265,7 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
                                int duty_ns, int period_ns)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        struct atmel_tcb_pwm_device *atcbpwm = NULL;
        int i = 0;
        int slowclk = 0;
@@ -338,9 +312,9 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        period = div_u64(period_ns, min);
 
        if (pwm->hwpwm == 0)
-               atcbpwm = tcbpwmc->pwms[1];
+               atcbpwm = &tcbpwmc->pwms[1];
        else
-               atcbpwm = tcbpwmc->pwms[0];
+               atcbpwm = &tcbpwmc->pwms[0];
 
        /*
         * PWM devices provided by the TCB driver are grouped by 2.
@@ -371,11 +345,8 @@ static int atmel_tcb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        int duty_cycle, period;
        int ret;
 
-       /* This function only sets a flag in driver data */
-       atmel_tcb_pwm_set_polarity(chip, pwm, state->polarity);
-
        if (!state->enabled) {
-               atmel_tcb_pwm_disable(chip, pwm);
+               atmel_tcb_pwm_disable(chip, pwm, state->polarity);
                return 0;
        }
 
@@ -386,7 +357,7 @@ static int atmel_tcb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        if (ret)
                return ret;
 
-       return atmel_tcb_pwm_enable(chip, pwm);
+       return atmel_tcb_pwm_enable(chip, pwm, state->polarity);
 }
 
 static const struct pwm_ops atmel_tcb_pwm_ops = {
@@ -422,13 +393,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
        struct atmel_tcb_pwm_chip *tcbpwm;
        const struct atmel_tcb_config *config;
        struct device_node *np = pdev->dev.of_node;
-       struct regmap *regmap;
-       struct clk *clk, *gclk = NULL;
-       struct clk *slow_clk;
        char clk_name[] = "t0_clk";
        int err;
        int channel;
 
+       tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
+       if (tcbpwm == NULL)
+               return -ENOMEM;
+
        err = of_property_read_u32(np, "reg", &channel);
        if (err < 0) {
                dev_err(&pdev->dev,
@@ -437,49 +409,43 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
                return err;
        }
 
-       regmap = syscon_node_to_regmap(np->parent);
-       if (IS_ERR(regmap))
-               return PTR_ERR(regmap);
+       tcbpwm->regmap = syscon_node_to_regmap(np->parent);
+       if (IS_ERR(tcbpwm->regmap))
+               return PTR_ERR(tcbpwm->regmap);
 
-       slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
-       if (IS_ERR(slow_clk))
-               return PTR_ERR(slow_clk);
+       tcbpwm->slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+       if (IS_ERR(tcbpwm->slow_clk))
+               return PTR_ERR(tcbpwm->slow_clk);
 
        clk_name[1] += channel;
-       clk = of_clk_get_by_name(np->parent, clk_name);
-       if (IS_ERR(clk))
-               clk = of_clk_get_by_name(np->parent, "t0_clk");
-       if (IS_ERR(clk))
-               return PTR_ERR(clk);
+       tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
+       if (IS_ERR(tcbpwm->clk))
+               tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
+       if (IS_ERR(tcbpwm->clk)) {
+               err = PTR_ERR(tcbpwm->clk);
+               goto err_slow_clk;
+       }
 
        match = of_match_node(atmel_tcb_of_match, np->parent);
        config = match->data;
 
        if (config->has_gclk) {
-               gclk = of_clk_get_by_name(np->parent, "gclk");
-               if (IS_ERR(gclk))
-                       return PTR_ERR(gclk);
-       }
-
-       tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
-       if (tcbpwm == NULL) {
-               err = -ENOMEM;
-               goto err_slow_clk;
+               tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
+               if (IS_ERR(tcbpwm->gclk)) {
+                       err = PTR_ERR(tcbpwm->gclk);
+                       goto err_clk;
+               }
        }
 
        tcbpwm->chip.dev = &pdev->dev;
        tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
        tcbpwm->chip.npwm = NPWM;
        tcbpwm->channel = channel;
-       tcbpwm->regmap = regmap;
-       tcbpwm->clk = clk;
-       tcbpwm->gclk = gclk;
-       tcbpwm->slow_clk = slow_clk;
        tcbpwm->width = config->counter_width;
 
-       err = clk_prepare_enable(slow_clk);
+       err = clk_prepare_enable(tcbpwm->slow_clk);
        if (err)
-               goto err_slow_clk;
+               goto err_gclk;
 
        spin_lock_init(&tcbpwm->lock);
 
@@ -494,8 +460,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 err_disable_clk:
        clk_disable_unprepare(tcbpwm->slow_clk);
 
+err_gclk:
+       clk_put(tcbpwm->gclk);
+
+err_clk:
+       clk_put(tcbpwm->clk);
+
 err_slow_clk:
-       clk_put(slow_clk);
+       clk_put(tcbpwm->slow_clk);
 
        return err;
 }
@@ -507,8 +479,9 @@ static void atmel_tcb_pwm_remove(struct platform_device *pdev)
        pwmchip_remove(&tcbpwm->chip);
 
        clk_disable_unprepare(tcbpwm->slow_clk);
-       clk_put(tcbpwm->slow_clk);
+       clk_put(tcbpwm->gclk);
        clk_put(tcbpwm->clk);
+       clk_put(tcbpwm->slow_clk);
 }
 
 static const struct of_device_id atmel_tcb_pwm_dt_ids[] = {
index 5f7d286..1f73325 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
@@ -36,7 +35,7 @@
 #define PWM_SR                 0x0C
 #define PWM_ISR                        0x1C
 /* Bit field in SR */
-#define PWM_SR_ALL_CH_ON       0x0F
+#define PWM_SR_ALL_CH_MASK     0x0F
 
 /* The following register is PWM channel related registers */
 #define PWM_CH_REG_OFFSET      0x200
@@ -464,6 +463,42 @@ static const struct of_device_id atmel_pwm_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 
+static int atmel_pwm_enable_clk_if_on(struct atmel_pwm_chip *atmel_pwm, bool on)
+{
+       unsigned int i, cnt = 0;
+       unsigned long sr;
+       int ret = 0;
+
+       sr = atmel_pwm_readl(atmel_pwm, PWM_SR) & PWM_SR_ALL_CH_MASK;
+       if (!sr)
+               return 0;
+
+       cnt = bitmap_weight(&sr, atmel_pwm->chip.npwm);
+
+       if (!on)
+               goto disable_clk;
+
+       for (i = 0; i < cnt; i++) {
+               ret = clk_enable(atmel_pwm->clk);
+               if (ret) {
+                       dev_err(atmel_pwm->chip.dev,
+                               "failed to enable clock for pwm %pe\n",
+                               ERR_PTR(ret));
+
+                       cnt = i;
+                       goto disable_clk;
+               }
+       }
+
+       return 0;
+
+disable_clk:
+       while (cnt--)
+               clk_disable(atmel_pwm->clk);
+
+       return ret;
+}
+
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
        struct atmel_pwm_chip *atmel_pwm;
@@ -482,51 +517,39 @@ static int atmel_pwm_probe(struct platform_device *pdev)
        if (IS_ERR(atmel_pwm->base))
                return PTR_ERR(atmel_pwm->base);
 
-       atmel_pwm->clk = devm_clk_get(&pdev->dev, NULL);
+       atmel_pwm->clk = devm_clk_get_prepared(&pdev->dev, NULL);
        if (IS_ERR(atmel_pwm->clk))
-               return PTR_ERR(atmel_pwm->clk);
-
-       ret = clk_prepare(atmel_pwm->clk);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to prepare PWM clock\n");
-               return ret;
-       }
+               return dev_err_probe(&pdev->dev, PTR_ERR(atmel_pwm->clk),
+                                    "failed to get prepared PWM clock\n");
 
        atmel_pwm->chip.dev = &pdev->dev;
        atmel_pwm->chip.ops = &atmel_pwm_ops;
        atmel_pwm->chip.npwm = 4;
 
-       ret = pwmchip_add(&atmel_pwm->chip);
+       ret = atmel_pwm_enable_clk_if_on(atmel_pwm, true);
+       if (ret < 0)
+               return ret;
+
+       ret = devm_pwmchip_add(&pdev->dev, &atmel_pwm->chip);
        if (ret < 0) {
-               dev_err(&pdev->dev, "failed to add PWM chip %d\n", ret);
-               goto unprepare_clk;
+               dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+               goto disable_clk;
        }
 
-       platform_set_drvdata(pdev, atmel_pwm);
+       return 0;
 
-       return ret;
+disable_clk:
+       atmel_pwm_enable_clk_if_on(atmel_pwm, false);
 
-unprepare_clk:
-       clk_unprepare(atmel_pwm->clk);
        return ret;
 }
 
-static void atmel_pwm_remove(struct platform_device *pdev)
-{
-       struct atmel_pwm_chip *atmel_pwm = platform_get_drvdata(pdev);
-
-       pwmchip_remove(&atmel_pwm->chip);
-
-       clk_unprepare(atmel_pwm->clk);
-}
-
 static struct platform_driver atmel_pwm_driver = {
        .driver = {
                .name = "atmel-pwm",
                .of_match_table = of_match_ptr(atmel_pwm_dt_ids),
        },
        .probe = atmel_pwm_probe,
-       .remove_new = atmel_pwm_remove,
 };
 module_platform_driver(atmel_pwm_driver);
 
index 4fa6e24..e5b00cc 100644 (file)
@@ -61,9 +61,9 @@ struct kona_pwmc {
        struct clk *clk;
 };
 
-static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *_chip)
+static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *chip)
 {
-       return container_of(_chip, struct kona_pwmc, chip);
+       return container_of(chip, struct kona_pwmc, chip);
 }
 
 /*
index 0c5992a..0971c66 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 4703b4a..b9f063d 100644 (file)
@@ -34,9 +34,9 @@ struct crystalcove_pwm {
        struct regmap *regmap;
 };
 
-static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *pc)
+static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *chip)
 {
-       return container_of(pc, struct crystalcove_pwm, chip);
+       return container_of(chip, struct crystalcove_pwm, chip);
 }
 
 static int crc_pwm_calc_clk_div(int period_ns)
index 74e863a..baaac0c 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
@@ -37,9 +38,9 @@ struct cros_ec_pwm {
        u16 duty_cycle;
 };
 
-static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *c)
+static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chip)
 {
-       return container_of(c, struct cros_ec_pwm_device, chip);
+       return container_of(chip, struct cros_ec_pwm_device, chip);
 }
 
 static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
@@ -218,14 +219,14 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static struct pwm_device *
-cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (args->args[0] >= pc->npwm)
+       if (args->args[0] >= chip->npwm)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+       pwm = pwm_request_from_chip(chip, args->args[0], NULL);
        if (IS_ERR(pwm))
                return pwm;
 
index 5caadbd..b7c6045 100644 (file)
@@ -11,8 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pwm.h>
index b95df1a..f7ba6fe 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/reset.h>
index 1f2eb1c..0651983 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 3b7067f..ef1293f 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/mfd/ingenic-tcu.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
index 35675e4..4b133a1 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <linux/mfd/lp3943.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
@@ -24,9 +25,9 @@ struct lp3943_pwm {
        struct lp3943_platform_data *pdata;
 };
 
-static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *_chip)
+static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip)
 {
-       return container_of(_chip, struct lp3943_pwm, chip);
+       return container_of(chip, struct lp3943_pwm, chip);
 }
 
 static struct lp3943_pwm_map *
index b9bf5b3..7a19a84 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
@@ -366,30 +367,21 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
        if (IS_ERR(lpc18xx_pwm->base))
                return PTR_ERR(lpc18xx_pwm->base);
 
-       lpc18xx_pwm->pwm_clk = devm_clk_get(&pdev->dev, "pwm");
+       lpc18xx_pwm->pwm_clk = devm_clk_get_enabled(&pdev->dev, "pwm");
        if (IS_ERR(lpc18xx_pwm->pwm_clk))
                return dev_err_probe(&pdev->dev, PTR_ERR(lpc18xx_pwm->pwm_clk),
                                     "failed to get pwm clock\n");
 
-       ret = clk_prepare_enable(lpc18xx_pwm->pwm_clk);
-       if (ret < 0)
-               return dev_err_probe(&pdev->dev, ret,
-                                    "could not prepare or enable pwm clock\n");
-
        lpc18xx_pwm->clk_rate = clk_get_rate(lpc18xx_pwm->pwm_clk);
-       if (!lpc18xx_pwm->clk_rate) {
-               ret = dev_err_probe(&pdev->dev,
-                                   -EINVAL, "pwm clock has no frequency\n");
-               goto disable_pwmclk;
-       }
+       if (!lpc18xx_pwm->clk_rate)
+               return dev_err_probe(&pdev->dev,
+                                    -EINVAL, "pwm clock has no frequency\n");
 
        /*
         * If clkrate is too fast, the calculations in .apply() might overflow.
         */
-       if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) {
-               ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
-               goto disable_pwmclk;
-       }
+       if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC)
+               return dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
 
        mutex_init(&lpc18xx_pwm->res_lock);
        mutex_init(&lpc18xx_pwm->period_lock);
@@ -435,18 +427,12 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
        lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val);
 
        ret = pwmchip_add(&lpc18xx_pwm->chip);
-       if (ret < 0) {
-               dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
-               goto disable_pwmclk;
-       }
+       if (ret < 0)
+               return dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
 
        platform_set_drvdata(pdev, lpc18xx_pwm);
 
        return 0;
-
-disable_pwmclk:
-       clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
-       return ret;
 }
 
 static void lpc18xx_pwm_remove(struct platform_device *pdev)
@@ -459,8 +445,6 @@ static void lpc18xx_pwm_remove(struct platform_device *pdev)
        val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
        lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL,
                           val | LPC18XX_PWM_CTRL_HALT);
-
-       clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
 }
 
 static struct platform_driver lpc18xx_pwm_driver = {
index 86a0ea0..806f0bb 100644 (file)
@@ -51,10 +51,10 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        if (duty_cycles > 255)
                duty_cycles = 255;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~0xFFFF;
        val |= (period_cycles << 8) | duty_cycles;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        return 0;
 }
@@ -69,9 +69,9 @@ static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        if (ret)
                return ret;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val |= PWM_ENABLE;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        return 0;
 }
@@ -81,9 +81,9 @@ static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
        u32 val;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~PWM_ENABLE;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        clk_disable_unprepare(lpc32xx->clk);
 }
@@ -141,9 +141,9 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
        lpc32xx->chip.npwm = 1;
 
        /* If PWM is disabled, configure the output to the default value */
-       val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~PWM_PIN_LEVEL;
-       writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
        if (ret < 0) {
index 7a51d21..6adb0ed 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 22f54db..25519cd 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 8750b57..e7525c9 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/math.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 
index 2401b67..a83bd6e 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index ab63b08..7514ea3 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/types.h>
 
 struct ntxec_pwm {
-       struct device *dev;
        struct ntxec *ec;
        struct pwm_chip chip;
 };
@@ -141,14 +140,13 @@ static int ntxec_pwm_probe(struct platform_device *pdev)
        struct ntxec_pwm *priv;
        struct pwm_chip *chip;
 
-       pdev->dev.of_node = pdev->dev.parent->of_node;
+       device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
 
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
        priv->ec = ec;
-       priv->dev = &pdev->dev;
 
        chip = &priv->chip;
        chip->dev = &pdev->dev;
index 762429d..1e475ed 100644 (file)
@@ -15,6 +15,7 @@
  *   input clock (PWMCR_SD is set) and the output is driven to inactive.
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
@@ -156,13 +157,6 @@ MODULE_DEVICE_TABLE(of, pwm_of_match);
 #define pwm_of_match NULL
 #endif
 
-static const struct platform_device_id *pxa_pwm_get_id_dt(struct device *dev)
-{
-       const struct of_device_id *id = of_match_device(pwm_of_match, dev);
-
-       return id ? id->data : NULL;
-}
-
 static int pwm_probe(struct platform_device *pdev)
 {
        const struct platform_device_id *id = platform_get_device_id(pdev);
@@ -170,7 +164,7 @@ static int pwm_probe(struct platform_device *pdev)
        int ret = 0;
 
        if (IS_ENABLED(CONFIG_OF) && id == NULL)
-               id = pxa_pwm_get_id_dt(&pdev->dev);
+               id = of_device_get_match_data(&pdev->dev);
 
        if (id == NULL)
                return -EINVAL;
index c1a1f2d..03ee18f 100644 (file)
@@ -52,9 +52,9 @@ struct rockchip_pwm_data {
        u32 enable_conf;
 };
 
-static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
+static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *chip)
 {
-       return container_of(c, struct rockchip_pwm_chip, chip);
+       return container_of(chip, struct rockchip_pwm_chip, chip);
 }
 
 static int rockchip_pwm_get_state(struct pwm_chip *chip,
index bed8bd6..a56cecb 100644 (file)
@@ -40,7 +40,7 @@
  * struct rz_mtu3_channel_io_map - MTU3 pwm channel map
  *
  * @base_pwm_number: First PWM of a channel
- * @num: number of IOs on the HW channel.
+ * @num_channel_ios: number of IOs on the HW channel.
  */
 struct rz_mtu3_channel_io_map {
        u8 base_pwm_number;
index ae49d67..eabddb7 100644 (file)
@@ -13,6 +13,7 @@
  */
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
@@ -51,9 +52,9 @@ struct pwm_sifive_ddata {
 };
 
 static inline
-struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *c)
+struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *chip)
 {
-       return container_of(c, struct pwm_sifive_ddata, chip);
+       return container_of(chip, struct pwm_sifive_ddata, chip);
 }
 
 static int pwm_sifive_request(struct pwm_chip *chip, struct pwm_device *pwm)
index e64900a..9e42e3a 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
 
        regmap_write((priv)->regmap, (priv)->offset + (reg), (val))
 
 struct sl28cpld_pwm {
-       struct pwm_chip pwm_chip;
+       struct pwm_chip chip;
        struct regmap *regmap;
        u32 offset;
 };
-#define sl28cpld_pwm_from_chip(_chip) \
-       container_of(_chip, struct sl28cpld_pwm, pwm_chip)
+
+static inline struct sl28cpld_pwm *sl28cpld_pwm_from_chip(struct pwm_chip *chip)
+{
+       return container_of(chip, struct sl28cpld_pwm, chip);
+}
 
 static int sl28cpld_pwm_get_state(struct pwm_chip *chip,
                                  struct pwm_device *pwm,
@@ -228,12 +232,12 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
        }
 
        /* Initialize the pwm_chip structure */
-       chip = &priv->pwm_chip;
+       chip = &priv->chip;
        chip->dev = &pdev->dev;
        chip->ops = &sl28cpld_pwm_ops;
        chip->npwm = 1;
 
-       ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip);
+       ret = devm_pwmchip_add(&pdev->dev, chip);
        if (ret) {
                dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
                        ERR_PTR(ret));
index d43a6fa..1499c8c 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/math64.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 62e397a..3d6be77 100644 (file)
@@ -637,7 +637,7 @@ static int stm32_pwm_probe(struct platform_device *pdev)
        priv->chip.ops = &stm32pwm_ops;
        priv->chip.npwm = stm32_pwm_detect_channels(priv);
 
-       ret = pwmchip_add(&priv->chip);
+       ret = devm_pwmchip_add(dev, &priv->chip);
        if (ret < 0)
                return ret;
 
@@ -646,17 +646,6 @@ static int stm32_pwm_probe(struct platform_device *pdev)
        return 0;
 }
 
-static void stm32_pwm_remove(struct platform_device *pdev)
-{
-       struct stm32_pwm *priv = platform_get_drvdata(pdev);
-       unsigned int i;
-
-       for (i = 0; i < priv->chip.npwm; i++)
-               pwm_disable(&priv->chip.pwms[i]);
-
-       pwmchip_remove(&priv->chip);
-}
-
 static int __maybe_unused stm32_pwm_suspend(struct device *dev)
 {
        struct stm32_pwm *priv = dev_get_drvdata(dev);
@@ -701,7 +690,6 @@ MODULE_DEVICE_TABLE(of, stm32_pwm_of_match);
 
 static struct platform_driver stm32_pwm_driver = {
        .probe  = stm32_pwm_probe,
-       .remove_new = stm32_pwm_remove,
        .driver = {
                .name = "stm32-pwm",
                .of_match_table = stm32_pwm_of_match,
index 5d4a476..e205405 100644 (file)
@@ -61,8 +61,8 @@ static int stmpe_24xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        return 0;
 }
 
-static void stmpe_24xx_pwm_disable(struct pwm_chip *chip,
-                                  struct pwm_device *pwm)
+static int stmpe_24xx_pwm_disable(struct pwm_chip *chip,
+                                 struct pwm_device *pwm)
 {
        struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip);
        u8 value;
@@ -72,17 +72,16 @@ static void stmpe_24xx_pwm_disable(struct pwm_chip *chip,
        if (ret < 0) {
                dev_err(chip->dev, "error reading PWM#%u control\n",
                        pwm->hwpwm);
-               return;
+               return ret;
        }
 
        value = ret & ~BIT(pwm->hwpwm);
 
        ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value);
-       if (ret) {
+       if (ret)
                dev_err(chip->dev, "error writing PWM#%u control\n",
                        pwm->hwpwm);
-               return;
-       }
+       return ret;
 }
 
 /* STMPE 24xx PWM instructions */
@@ -111,7 +110,9 @@ static int stmpe_24xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
        /* Make sure we are disabled */
        if (pwm_is_enabled(pwm)) {
-               stmpe_24xx_pwm_disable(chip, pwm);
+               ret = stmpe_24xx_pwm_disable(chip, pwm);
+               if (ret)
+                       return ret;
        } else {
                /* Connect the PWM to the pin */
                pin = pwm->hwpwm;
@@ -269,7 +270,7 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        if (!state->enabled) {
                if (pwm->state.enabled)
-                       stmpe_24xx_pwm_disable(chip, pwm);
+                       return stmpe_24xx_pwm_disable(chip, pwm);
 
                return 0;
        }
index a8790a8..c84fcf1 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/reset.h>
index d6ebe9f..7705c7b 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 5810abf..a169a34 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm_opp.h>
 #include <linux/pwm.h>
 #include <linux/platform_device.h>
index 1094499..8c94b26 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /* ECAP registers and bits definitions */
 #define CAP1                   0x08
index bb3959a..ecbfd7e 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /* EHRPWM registers and bits definitions */
 
index e3fb79b..7f7591a 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 
index d2c48fd..6d46db5 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 
 #include <asm/div64.h>
 
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
-
 /*
  * SoC architecture allocates register space for 4 PWMs but only
  * 2 are currently implemented.
index d022184..9b7c3d7 100644 (file)
@@ -119,7 +119,7 @@ static struct regulator_desc tps6287x_reg = {
        .ramp_mask = TPS6287X_CTRL1_VRAMP,
        .ramp_delay_table = tps6287x_ramp_table,
        .n_ramp_values = ARRAY_SIZE(tps6287x_ramp_table),
-       .n_voltages = 256,
+       .n_voltages = 256 * ARRAY_SIZE(tps6287x_voltage_ranges),
        .linear_ranges = tps6287x_voltage_ranges,
        .n_linear_ranges = ARRAY_SIZE(tps6287x_voltage_ranges),
        .linear_range_selectors_bitfield = tps6287x_voltage_range_sel,
index 25ef102..b7f0c87 100644 (file)
@@ -384,21 +384,19 @@ static int tps6594_request_reg_irqs(struct platform_device *pdev,
                if (irq < 0)
                        return -EINVAL;
 
-               irq_data[*irq_idx + j].dev = tps->dev;
-               irq_data[*irq_idx + j].type = irq_type;
-               irq_data[*irq_idx + j].rdev = rdev;
+               irq_data[*irq_idx].dev = tps->dev;
+               irq_data[*irq_idx].type = irq_type;
+               irq_data[*irq_idx].rdev = rdev;
 
                error = devm_request_threaded_irq(tps->dev, irq, NULL,
-                                                 tps6594_regulator_irq_handler,
-                                                 IRQF_ONESHOT,
-                                                 irq_type->irq_name,
-                                                 &irq_data[*irq_idx]);
-               (*irq_idx)++;
+                                                 tps6594_regulator_irq_handler, IRQF_ONESHOT,
+                                                 irq_type->irq_name, &irq_data[*irq_idx]);
                if (error) {
                        dev_err(tps->dev, "tps6594 failed to request %s IRQ %d: %d\n",
                                irq_type->irq_name, irq, error);
                        return error;
                }
+               (*irq_idx)++;
        }
        return 0;
 }
@@ -420,8 +418,8 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
        int error, i, irq, multi, delta;
        int irq_idx = 0;
        int buck_idx = 0;
-       int ext_reg_irq_nb = 2;
-
+       size_t ext_reg_irq_nb = 2;
+       size_t reg_irq_nb;
        enum {
                MULTI_BUCK12,
                MULTI_BUCK123,
@@ -484,15 +482,16 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
                }
        }
 
-       if (tps->chip_id == LP8764)
+       if (tps->chip_id == LP8764) {
                /* There is only 4 buck on LP8764 */
                buck_configured[4] = 1;
+               reg_irq_nb = size_mul(REGS_INT_NB, (BUCK_NB - 1));
+       } else {
+               reg_irq_nb = size_mul(REGS_INT_NB, (size_add(BUCK_NB, LDO_NB)));
+       }
 
-       irq_data = devm_kmalloc_array(tps->dev,
-                               REGS_INT_NB * sizeof(struct tps6594_regulator_irq_data),
-                               ARRAY_SIZE(tps6594_bucks_irq_types) +
-                               ARRAY_SIZE(tps6594_ldos_irq_types),
-                               GFP_KERNEL);
+       irq_data = devm_kmalloc_array(tps->dev, reg_irq_nb,
+                                     sizeof(struct tps6594_regulator_irq_data), GFP_KERNEL);
        if (!irq_data)
                return -ENOMEM;
 
index 05f4b2d..d750243 100644 (file)
@@ -904,9 +904,9 @@ config RTC_DRV_PCF2127
        select REGMAP_SPI if SPI_MASTER
        select WATCHDOG_CORE if WATCHDOG
        help
-         If you say yes here you get support for the NXP PCF2127/29 RTC
+         If you say yes here you get support for the NXP PCF2127/29/31 RTC
          chips with integrated quartz crystal for industrial applications.
-         Both chips also have watchdog timer and tamper switch detection
+         These chips also have watchdog timer and tamper switch detection
          features.
 
          PCF2127 has an additional feature of 512 bytes battery backed
@@ -1196,6 +1196,7 @@ config RTC_DRV_MSM6242
 config RTC_DRV_BQ4802
        tristate "TI BQ4802"
        depends on HAS_IOMEM && HAS_IOPORT
+       depends on SPARC || COMPILE_TEST
        help
          If you say Y here you will get support for the TI
          BQ4802 RTC chip.
index 499d891..1b63111 100644 (file)
@@ -376,7 +376,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        err = rtc_valid_tm(&alarm->time);
 
 done:
-       if (err)
+       if (err && alarm->enabled)
                dev_warn(&rtc->dev, "invalid alarm value: %ptR\n",
                         &alarm->time);
 
index e08d318..fde2b80 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/i2c.h>
 #include <linux/kstrtox.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 #include <linux/watchdog.h>
 
index b4139c2..569c105 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
@@ -474,7 +473,6 @@ static const struct armada38x_rtc_data armada8k_data = {
        .alarm = ALARM2,
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id armada38x_rtc_of_match_table[] = {
        {
                .compatible = "marvell,armada-380-rtc",
@@ -487,7 +485,6 @@ static const struct of_device_id armada38x_rtc_of_match_table[] = {
        {}
 };
 MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
-#endif
 
 static __init int armada38x_rtc_probe(struct platform_device *pdev)
 {
@@ -577,7 +574,7 @@ static struct platform_driver armada38x_rtc_driver = {
        .driver         = {
                .name   = "armada38x-rtc",
                .pm     = &armada38x_rtc_pm_ops,
-               .of_match_table = of_match_ptr(armada38x_rtc_of_match_table),
+               .of_match_table = armada38x_rtc_of_match_table,
        },
 };
 
index a93352e..880b015 100644 (file)
@@ -118,7 +118,7 @@ MODULE_DEVICE_TABLE(of, aspeed_rtc_match);
 static struct platform_driver aspeed_rtc_driver = {
        .driver = {
                .name = "aspeed-rtc",
-               .of_match_table = of_match_ptr(aspeed_rtc_match),
+               .of_match_table = aspeed_rtc_match,
        },
 };
 
index e9d1723..add4f71 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
@@ -642,7 +641,7 @@ static struct platform_driver at91_rtc_driver = {
        .driver         = {
                .name   = "at91_rtc",
                .pm     = &at91_rtc_pm_ops,
-               .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+               .of_match_table = at91_rtc_dt_ids,
        },
 };
 
index 610f27d..f93bee9 100644 (file)
@@ -534,7 +534,7 @@ static struct platform_driver at91_rtc_driver = {
        .driver         = {
                .name   = "rtc-at91sam9",
                .pm     = &at91_rtc_pm_ops,
-               .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+               .of_match_table = at91_rtc_dt_ids,
        },
 };
 
index c9416fe..228fb2d 100644 (file)
@@ -913,6 +913,10 @@ static inline void cmos_check_acpi_rtc_status(struct device *dev,
 #define        INITSECTION     __init
 #endif
 
+#define SECS_PER_DAY   (24 * 60 * 60)
+#define SECS_PER_MONTH (28 * SECS_PER_DAY)
+#define SECS_PER_YEAR  (365 * SECS_PER_DAY)
+
 static int INITSECTION
 cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 {
@@ -1019,6 +1023,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                goto cleanup0;
        }
 
+       if (cmos_rtc.mon_alrm)
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_YEAR - 1;
+       else if (cmos_rtc.day_alrm)
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_MONTH - 1;
+       else
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
        rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
 
        if (!mc146818_does_rtc_work()) {
index 998ab86..0cd397c 100644 (file)
@@ -182,21 +182,15 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
        if (ret < 0) {
-               if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) {
-                       /*
-                        * RTC chips on some older Chromebooks can only handle
-                        * alarms up to 24h in the future. Try to set an alarm
-                        * below that limit to avoid suspend failures.
-                        */
-                       ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
-                                             SECS_PER_DAY - 1);
-               }
-
-               if (ret < 0) {
-                       dev_err(dev, "error setting alarm in %u seconds: %d\n",
-                               alarm_offset, ret);
-                       return ret;
-               }
+               dev_err(dev, "error setting alarm in %u seconds: %d\n",
+                       alarm_offset, ret);
+               /*
+                * The EC code returns -EINVAL if the alarm time is too
+                * far in the future. Convert it to the expected error code.
+                */
+               if (ret == -EINVAL)
+                       ret = -ERANGE;
+               return ret;
        }
 
        return 0;
@@ -355,6 +349,20 @@ static int cros_ec_rtc_probe(struct platform_device *pdev)
        cros_ec_rtc->rtc->ops = &cros_ec_rtc_ops;
        cros_ec_rtc->rtc->range_max = U32_MAX;
 
+       /*
+        * The RTC on some older Chromebooks can only handle alarms less than
+        * 24 hours in the future. The only way to find out is to try to set an
+        * alarm further in the future. If that fails, assume that the RTC
+        * connected to the EC can only handle less than 24 hours of alarm
+        * window.
+        */
+       ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, SECS_PER_DAY * 2);
+       if (ret == -EINVAL)
+               cros_ec_rtc->rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
+       (void)cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
+                             EC_RTC_ALARM_CLEAR);
+
        ret = devm_rtc_register_device(cros_ec_rtc->rtc);
        if (ret)
                return ret;
index ee2efb4..2f5d606 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -496,6 +497,12 @@ static int da9063_rtc_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
                        irq_alarm, ret);
 
+       ret = dev_pm_set_wake_irq(&pdev->dev, irq_alarm);
+       if (ret)
+               dev_warn(&pdev->dev,
+                        "Failed to set IRQ %d as a wake IRQ: %d\n",
+                        irq_alarm, ret);
+
        device_init_wakeup(&pdev->dev, true);
 
        return devm_rtc_register_device(rtc->rtc_dev);
index ed93604..d4de401 100644 (file)
@@ -336,8 +336,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        /* make sure alarm fires within the next 24 hours */
        if (later <= now)
                return -EINVAL;
-       if ((later - now) > 24 * 60 * 60)
-               return -EDOM;
+       if ((later - now) > ds1305->rtc->alarm_offset_max)
+               return -ERANGE;
 
        /* disable alarm if needed */
        if (ds1305->ctrl[0] & DS1305_AEI0) {
@@ -691,6 +691,7 @@ static int ds1305_probe(struct spi_device *spi)
        ds1305->rtc->ops = &ds1305_ops;
        ds1305->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099;
+       ds1305->rtc->alarm_offset_max = 24 * 60 * 60;
 
        ds1305_nvmem_cfg.priv = ds1305;
        status = devm_rtc_register_device(ds1305->rtc);
index cb5acec..506b7d1 100644 (file)
@@ -1744,7 +1744,7 @@ static int ds1307_probe(struct i2c_client *client)
 
        match = device_get_match_data(&client->dev);
        if (match) {
-               ds1307->type = (enum ds_type)match;
+               ds1307->type = (uintptr_t)match;
                chip = &chips[ds1307->type];
        } else if (id) {
                chip = &chips[id->driver_data];
index a5026b0..6ae8b9a 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/jiffies.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/module.h>
index 0480f59..3231fd9 100644 (file)
@@ -7,9 +7,8 @@
 #include <linux/rtc.h>
 #include <linux/types.h>
 #include <linux/bcd.h>
-#include <linux/platform_data/rtc-ds2404.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 
 #include <linux/io.h>
 #define DS2404_CLK     1
 #define DS2404_DQ      2
 
-struct ds2404_gpio {
-       const char *name;
-       unsigned int gpio;
-};
-
 struct ds2404 {
-       struct ds2404_gpio *gpio;
+       struct device *dev;
+       struct gpio_desc *rst_gpiod;
+       struct gpio_desc *clk_gpiod;
+       struct gpio_desc *dq_gpiod;
        struct rtc_device *rtc;
 };
 
-static struct ds2404_gpio ds2404_gpio[] = {
-       { "RTC RST", 0 },
-       { "RTC CLK", 0 },
-       { "RTC DQ", 0 },
-};
-
-static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev,
-                         struct ds2404_platform_data *pdata)
+static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev)
 {
-       int i, err;
-
-       ds2404_gpio[DS2404_RST].gpio = pdata->gpio_rst;
-       ds2404_gpio[DS2404_CLK].gpio = pdata->gpio_clk;
-       ds2404_gpio[DS2404_DQ].gpio = pdata->gpio_dq;
-
-       for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++) {
-               err = gpio_request(ds2404_gpio[i].gpio, ds2404_gpio[i].name);
-               if (err) {
-                       dev_err(&pdev->dev, "error mapping gpio %s: %d\n",
-                               ds2404_gpio[i].name, err);
-                       goto err_request;
-               }
-               if (i != DS2404_DQ)
-                       gpio_direction_output(ds2404_gpio[i].gpio, 1);
-       }
+       struct device *dev = &pdev->dev;
 
-       chip->gpio = ds2404_gpio;
-       return 0;
+       /* This will de-assert RESET, declare this GPIO as GPIOD_ACTIVE_LOW */
+       chip->rst_gpiod = devm_gpiod_get(dev, "rst", GPIOD_OUT_LOW);
+       if (IS_ERR(chip->rst_gpiod))
+               return PTR_ERR(chip->rst_gpiod);
 
-err_request:
-       while (--i >= 0)
-               gpio_free(ds2404_gpio[i].gpio);
-       return err;
-}
+       chip->clk_gpiod = devm_gpiod_get(dev, "clk", GPIOD_OUT_HIGH);
+       if (IS_ERR(chip->clk_gpiod))
+               return PTR_ERR(chip->clk_gpiod);
 
-static void ds2404_gpio_unmap(void *data)
-{
-       int i;
+       chip->dq_gpiod = devm_gpiod_get(dev, "dq", GPIOD_ASIS);
+       if (IS_ERR(chip->dq_gpiod))
+               return PTR_ERR(chip->dq_gpiod);
 
-       for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++)
-               gpio_free(ds2404_gpio[i].gpio);
+       return 0;
 }
 
-static void ds2404_reset(struct device *dev)
+static void ds2404_reset(struct ds2404 *chip)
 {
-       gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 0);
+       gpiod_set_value(chip->rst_gpiod, 1);
        udelay(1000);
-       gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 1);
-       gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
-       gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 0);
+       gpiod_set_value(chip->rst_gpiod, 0);
+       gpiod_set_value(chip->clk_gpiod, 0);
+       gpiod_direction_output(chip->dq_gpiod, 0);
        udelay(10);
 }
 
-static void ds2404_write_byte(struct device *dev, u8 byte)
+static void ds2404_write_byte(struct ds2404 *chip, u8 byte)
 {
        int i;
 
-       gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 1);
+       gpiod_direction_output(chip->dq_gpiod, 1);
        for (i = 0; i < 8; i++) {
-               gpio_set_value(ds2404_gpio[DS2404_DQ].gpio, byte & (1 << i));
+               gpiod_set_value(chip->dq_gpiod, byte & (1 << i));
                udelay(10);
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+               gpiod_set_value(chip->clk_gpiod, 1);
                udelay(10);
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+               gpiod_set_value(chip->clk_gpiod, 0);
                udelay(10);
        }
 }
 
-static u8 ds2404_read_byte(struct device *dev)
+static u8 ds2404_read_byte(struct ds2404 *chip)
 {
        int i;
        u8 ret = 0;
 
-       gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
+       gpiod_direction_input(chip->dq_gpiod);
 
        for (i = 0; i < 8; i++) {
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+               gpiod_set_value(chip->clk_gpiod, 0);
                udelay(10);
-               if (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+               if (gpiod_get_value(chip->dq_gpiod))
                        ret |= 1 << i;
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+               gpiod_set_value(chip->clk_gpiod, 1);
                udelay(10);
        }
        return ret;
 }
 
-static void ds2404_read_memory(struct device *dev, u16 offset,
+static void ds2404_read_memory(struct ds2404 *chip, u16 offset,
                               int length, u8 *out)
 {
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_READ_MEMORY_CMD);
-       ds2404_write_byte(dev, offset & 0xff);
-       ds2404_write_byte(dev, (offset >> 8) & 0xff);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_READ_MEMORY_CMD);
+       ds2404_write_byte(chip, offset & 0xff);
+       ds2404_write_byte(chip, (offset >> 8) & 0xff);
        while (length--)
-               *out++ = ds2404_read_byte(dev);
+               *out++ = ds2404_read_byte(chip);
 }
 
-static void ds2404_write_memory(struct device *dev, u16 offset,
+static void ds2404_write_memory(struct ds2404 *chip, u16 offset,
                                int length, u8 *out)
 {
        int i;
        u8 ta01, ta02, es;
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_WRITE_SCRATCHPAD_CMD);
-       ds2404_write_byte(dev, offset & 0xff);
-       ds2404_write_byte(dev, (offset >> 8) & 0xff);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_WRITE_SCRATCHPAD_CMD);
+       ds2404_write_byte(chip, offset & 0xff);
+       ds2404_write_byte(chip, (offset >> 8) & 0xff);
 
        for (i = 0; i < length; i++)
-               ds2404_write_byte(dev, out[i]);
+               ds2404_write_byte(chip, out[i]);
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_READ_SCRATCHPAD_CMD);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_READ_SCRATCHPAD_CMD);
 
-       ta01 = ds2404_read_byte(dev);
-       ta02 = ds2404_read_byte(dev);
-       es = ds2404_read_byte(dev);
+       ta01 = ds2404_read_byte(chip);
+       ta02 = ds2404_read_byte(chip);
+       es = ds2404_read_byte(chip);
 
        for (i = 0; i < length; i++) {
-               if (out[i] != ds2404_read_byte(dev)) {
-                       dev_err(dev, "read invalid data\n");
+               if (out[i] != ds2404_read_byte(chip)) {
+                       dev_err(chip->dev, "read invalid data\n");
                        return;
                }
        }
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_COPY_SCRATCHPAD_CMD);
-       ds2404_write_byte(dev, ta01);
-       ds2404_write_byte(dev, ta02);
-       ds2404_write_byte(dev, es);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_COPY_SCRATCHPAD_CMD);
+       ds2404_write_byte(chip, ta01);
+       ds2404_write_byte(chip, ta02);
+       ds2404_write_byte(chip, es);
 
-       gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
-       while (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+       while (gpiod_get_value(chip->dq_gpiod))
                ;
 }
 
-static void ds2404_enable_osc(struct device *dev)
+static void ds2404_enable_osc(struct ds2404 *chip)
 {
        u8 in[1] = { 0x10 }; /* enable oscillator */
-       ds2404_write_memory(dev, 0x201, 1, in);
+
+       ds2404_write_memory(chip, 0x201, 1, in);
 }
 
 static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 {
+       struct ds2404 *chip = dev_get_drvdata(dev);
        unsigned long time = 0;
        __le32 hw_time = 0;
 
-       ds2404_read_memory(dev, 0x203, 4, (u8 *)&hw_time);
+       ds2404_read_memory(chip, 0x203, 4, (u8 *)&hw_time);
        time = le32_to_cpu(hw_time);
 
        rtc_time64_to_tm(time, dt);
@@ -193,8 +168,9 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 
 static int ds2404_set_time(struct device *dev, struct rtc_time *dt)
 {
+       struct ds2404 *chip = dev_get_drvdata(dev);
        u32 time = cpu_to_le32(rtc_tm_to_time64(dt));
-       ds2404_write_memory(dev, 0x203, 4, (u8 *)&time);
+       ds2404_write_memory(chip, 0x203, 4, (u8 *)&time);
        return 0;
 }
 
@@ -205,7 +181,6 @@ static const struct rtc_class_ops ds2404_rtc_ops = {
 
 static int rtc_probe(struct platform_device *pdev)
 {
-       struct ds2404_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct ds2404 *chip;
        int retval = -EBUSY;
 
@@ -213,22 +188,16 @@ static int rtc_probe(struct platform_device *pdev)
        if (!chip)
                return -ENOMEM;
 
+       chip->dev = &pdev->dev;
+
        chip->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(chip->rtc))
                return PTR_ERR(chip->rtc);
 
-       retval = ds2404_gpio_map(chip, pdev, pdata);
+       retval = ds2404_gpio_map(chip, pdev);
        if (retval)
                return retval;
 
-       retval = devm_add_action_or_reset(&pdev->dev, ds2404_gpio_unmap, chip);
-       if (retval)
-               return retval;
-
-       dev_info(&pdev->dev, "using GPIOs RST:%d, CLK:%d, DQ:%d\n",
-                chip->gpio[DS2404_RST].gpio, chip->gpio[DS2404_CLK].gpio,
-                chip->gpio[DS2404_DQ].gpio);
-
        platform_set_drvdata(pdev, chip);
 
        chip->rtc->ops = &ds2404_rtc_ops;
@@ -238,7 +207,7 @@ static int rtc_probe(struct platform_device *pdev)
        if (retval)
                return retval;
 
-       ds2404_enable_osc(&pdev->dev);
+       ds2404_enable_osc(chip);
        return 0;
 }
 
index 3d7c407..a72c4ad 100644 (file)
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/fsl/ftm.h>
 #include <linux/rtc.h>
index a613257..4eef7af 100644 (file)
@@ -9,6 +9,8 @@
  */
 
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/hwmon.h>
 #include <linux/i2c.h>
@@ -31,6 +33,8 @@
 #define ISL12022_REG_SR                0x07
 #define ISL12022_REG_INT       0x08
 
+#define ISL12022_REG_PWR_VBAT  0x0a
+
 #define ISL12022_REG_BETA      0x0d
 #define ISL12022_REG_TEMP_L    0x28
 
 #define ISL12022_SR_LBAT75     (1 << 1)
 
 #define ISL12022_INT_WRTC      (1 << 6)
+#define ISL12022_INT_FO_MASK   GENMASK(3, 0)
+#define ISL12022_INT_FO_OFF    0x0
+#define ISL12022_INT_FO_32K    0x1
+
+#define ISL12022_REG_VB85_MASK GENMASK(5, 3)
+#define ISL12022_REG_VB75_MASK GENMASK(2, 0)
 
 #define ISL12022_BETA_TSE      (1 << 7)
 
@@ -141,12 +151,6 @@ static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
        if (ret)
                return ret;
 
-       if (buf[ISL12022_REG_SR] & (ISL12022_SR_LBAT85 | ISL12022_SR_LBAT75)) {
-               dev_warn(dev,
-                        "voltage dropped below %u%%, date and time is not reliable.\n",
-                        buf[ISL12022_REG_SR] & ISL12022_SR_LBAT85 ? 85 : 75);
-       }
-
        dev_dbg(dev,
                "raw data is sec=%02x, min=%02x, hr=%02x, mday=%02x, mon=%02x, year=%02x, wday=%02x, sr=%02x, int=%02x",
                buf[ISL12022_REG_SC],
@@ -204,7 +208,34 @@ static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return regmap_bulk_write(regmap, ISL12022_REG_SC, buf, sizeof(buf));
 }
 
+static int isl12022_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       u32 user, val;
+       int ret;
+
+       switch (cmd) {
+       case RTC_VL_READ:
+               ret = regmap_read(regmap, ISL12022_REG_SR, &val);
+               if (ret)
+                       return ret;
+
+               user = 0;
+               if (val & ISL12022_SR_LBAT85)
+                       user |= RTC_VL_BACKUP_LOW;
+
+               if (val & ISL12022_SR_LBAT75)
+                       user |= RTC_VL_BACKUP_EMPTY;
+
+               return put_user(user, (u32 __user *)arg);
+
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
 static const struct rtc_class_ops isl12022_rtc_ops = {
+       .ioctl          = isl12022_rtc_ioctl,
        .read_time      = isl12022_rtc_read_time,
        .set_time       = isl12022_rtc_set_time,
 };
@@ -215,10 +246,88 @@ static const struct regmap_config regmap_config = {
        .use_single_write = true,
 };
 
+static int isl12022_register_clock(struct device *dev)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       struct clk_hw *hw;
+       int ret;
+
+       if (!device_property_present(dev, "#clock-cells")) {
+               /*
+                * Disabling the F_OUT pin reduces the power
+                * consumption in battery mode by ~25%.
+                */
+               regmap_update_bits(regmap, ISL12022_REG_INT, ISL12022_INT_FO_MASK,
+                                  ISL12022_INT_FO_OFF);
+
+               return 0;
+       }
+
+       if (!IS_ENABLED(CONFIG_COMMON_CLK))
+               return 0;
+
+       /*
+        * For now, only support a fixed clock of 32768Hz (the reset default).
+        */
+       ret = regmap_update_bits(regmap, ISL12022_REG_INT,
+                                ISL12022_INT_FO_MASK, ISL12022_INT_FO_32K);
+       if (ret)
+               return ret;
+
+       hw = devm_clk_hw_register_fixed_rate(dev, "isl12022", NULL, 0, 32768);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+
+       return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+}
+
+static const u32 trip_levels[2][7] = {
+       { 2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000 },
+       { 1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000 },
+};
+
+static void isl12022_set_trip_levels(struct device *dev)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       u32 levels[2] = {0, 0};
+       int ret, i, j, x[2];
+       u8 val, mask;
+
+       device_property_read_u32_array(dev, "isil,battery-trip-levels-microvolt",
+                                      levels, 2);
+
+       for (i = 0; i < 2; i++) {
+               for (j = 0; j < ARRAY_SIZE(trip_levels[i]) - 1; j++) {
+                       if (levels[i] <= trip_levels[i][j])
+                               break;
+               }
+               x[i] = j;
+       }
+
+       val = FIELD_PREP(ISL12022_REG_VB85_MASK, x[0]) |
+               FIELD_PREP(ISL12022_REG_VB75_MASK, x[1]);
+       mask = ISL12022_REG_VB85_MASK | ISL12022_REG_VB75_MASK;
+
+       ret = regmap_update_bits(regmap, ISL12022_REG_PWR_VBAT, mask, val);
+       if (ret)
+               dev_warn(dev, "unable to set battery alarm levels: %d\n", ret);
+
+       /*
+        * Force a write of the TSE bit in the BETA register, in order
+        * to trigger an update of the LBAT75 and LBAT85 bits in the
+        * status register. In battery backup mode, those bits have
+        * another meaning, so without this, they may contain stale
+        * values for up to a minute after power-on.
+        */
+       regmap_write_bits(regmap, ISL12022_REG_BETA,
+                         ISL12022_BETA_TSE, ISL12022_BETA_TSE);
+}
+
 static int isl12022_probe(struct i2c_client *client)
 {
        struct rtc_device *rtc;
        struct regmap *regmap;
+       int ret;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
@@ -231,6 +340,11 @@ static int isl12022_probe(struct i2c_client *client)
 
        dev_set_drvdata(&client->dev, regmap);
 
+       ret = isl12022_register_clock(&client->dev);
+       if (ret)
+               return ret;
+
+       isl12022_set_trip_levels(&client->dev);
        isl12022_hwmon_register(&client->dev);
 
        rtc = devm_rtc_allocate_device(&client->dev);
index 5abff5d..2aabb91 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/mutex.h>
 #include <linux/nvmem-provider.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 
@@ -429,7 +428,7 @@ static void isl12026_force_power_modes(struct i2c_client *client)
        }
 }
 
-static int isl12026_probe_new(struct i2c_client *client)
+static int isl12026_probe(struct i2c_client *client)
 {
        struct isl12026 *priv;
        int ret;
@@ -490,7 +489,7 @@ static struct i2c_driver isl12026_driver = {
                .name   = "rtc-isl12026",
                .of_match_table = isl12026_dt_match,
        },
-       .probe          = isl12026_probe_new,
+       .probe          = isl12026_probe,
        .remove         = isl12026_remove,
 };
 
index b0712b4..e50c23e 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/clk.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/rtc.h>
 
@@ -188,7 +188,7 @@ isl1208_i2c_validate_client(struct i2c_client *client)
 static int isl1208_set_xtoscb(struct i2c_client *client, int sr, int xtosb_val)
 {
        /* Do nothing if bit is already set to desired value */
-       if ((sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
+       if (!!(sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
                return 0;
 
        if (xtosb_val)
@@ -862,17 +862,9 @@ isl1208_probe(struct i2c_client *client)
        i2c_set_clientdata(client, isl1208);
 
        /* Determine which chip we have */
-       if (client->dev.of_node) {
-               isl1208->config = of_device_get_match_data(&client->dev);
-               if (!isl1208->config)
-                       return -ENODEV;
-       } else {
-               const struct i2c_device_id *id = i2c_match_id(isl1208_id, client);
-
-               if (!id)
-                       return -ENODEV;
-               isl1208->config = (struct isl1208_config *)id->driver_data;
-       }
+       isl1208->config = i2c_get_match_data(client);
+       if (!isl1208->config)
+               return -ENODEV;
 
        rc = isl1208_clk_present(client, "xin");
        if (rc < 0)
@@ -952,7 +944,6 @@ isl1208_probe(struct i2c_client *client)
                rc = isl1208_setup_irq(client, client->irq);
                if (rc)
                        return rc;
-
        } else {
                clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, isl1208->rtc->features);
        }
index 36453b0..bafa7d1 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/property.h>
@@ -349,7 +349,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
        if (!rtc)
                return -ENOMEM;
 
-       rtc->type = (enum jz4740_rtc_type)device_get_match_data(dev);
+       rtc->type = (uintptr_t)device_get_match_data(dev);
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
index a4612e5..df17c48 100644 (file)
@@ -9,9 +9,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
index 3cc5151..866489a 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
index 481c952..dd4a62e 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 #include <linux/bcd.h>
@@ -269,9 +270,16 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
        return 0;
 }
 
+static const struct of_device_id m48t86_rtc_of_ids[] = {
+       { .compatible = "st,m48t86" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, m48t86_rtc_of_ids);
+
 static struct platform_driver m48t86_rtc_platform_driver = {
        .driver         = {
                .name   = "rtc-m48t86",
+               .of_match_table = m48t86_rtc_of_ids,
        },
        .probe          = m48t86_rtc_probe,
 };
index 07df43e..28858fc 100644 (file)
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
index 1d297af..1617063 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
index 81857a4..094c649 100644 (file)
@@ -7,9 +7,9 @@
 
 #include <linux/clk.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
index 762cf03..dbb935d 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/pm_wakeirq.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define RTC_INPUT_CLK_32768HZ  (0x00 << 5)
 #define RTC_INPUT_CLK_32000HZ  (0x01 << 5)
index a4e3f92..ed4e606 100644 (file)
@@ -538,7 +538,7 @@ MODULE_DEVICE_TABLE(of, nct3018y_of_match);
 static struct i2c_driver nct3018y_driver = {
        .driver         = {
                .name   = "rtc-nct3018y",
-               .of_match_table = of_match_ptr(nct3018y_of_match),
+               .of_match_table = nct3018y_of_match,
        },
        .probe          = nct3018y_probe,
        .id_table       = nct3018y_id,
index 8ae4d78..5b10ab0 100644 (file)
@@ -747,12 +747,12 @@ static int omap_rtc_probe(struct platform_device *pdev)
        }
 
        rtc->irq_timer = platform_get_irq(pdev, 0);
-       if (rtc->irq_timer <= 0)
-               return -ENOENT;
+       if (rtc->irq_timer < 0)
+               return rtc->irq_timer;
 
        rtc->irq_alarm = platform_get_irq(pdev, 1);
-       if (rtc->irq_alarm <= 0)
-               return -ENOENT;
+       if (rtc->irq_alarm < 0)
+               return rtc->irq_alarm;
 
        rtc->clk = devm_clk_get(&pdev->dev, "ext-clk");
        if (!IS_ERR(rtc->clk))
index ee03b04..9c04c4e 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * An I2C and SPI driver for the NXP PCF2127/29 RTC
+ * An I2C and SPI driver for the NXP PCF2127/29/31 RTC
  * Copyright 2013 Til-Technologies
  *
  * Author: Renaud Cerrato <r.cerrato@til-technologies.fr>
@@ -8,9 +8,13 @@
  * Watchdog and tamper functions
  * Author: Bruno Thomsen <bruno.thomsen@gmail.com>
  *
+ * PCF2131 support
+ * Author: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+ *
  * based on the other drivers in this same directory.
  *
- * Datasheet: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ * Datasheets: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ *             https://www.nxp.com/docs/en/data-sheet/PCF2131DS.pdf
  */
 
 #include <linux/i2c.h>
@@ -21,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
 
@@ -28,6 +33,7 @@
 #define PCF2127_REG_CTRL1              0x00
 #define PCF2127_BIT_CTRL1_POR_OVRD             BIT(3)
 #define PCF2127_BIT_CTRL1_TSF1                 BIT(4)
+#define PCF2127_BIT_CTRL1_STOP                 BIT(5)
 /* Control register 2 */
 #define PCF2127_REG_CTRL2              0x01
 #define PCF2127_BIT_CTRL2_AIE                  BIT(1)
 #define PCF2127_BIT_CTRL3_BF                   BIT(3)
 #define PCF2127_BIT_CTRL3_BTSE                 BIT(4)
 /* Time and date registers */
-#define PCF2127_REG_SC                 0x03
+#define PCF2127_REG_TIME_BASE          0x03
 #define PCF2127_BIT_SC_OSF                     BIT(7)
-#define PCF2127_REG_MN                 0x04
-#define PCF2127_REG_HR                 0x05
-#define PCF2127_REG_DM                 0x06
-#define PCF2127_REG_DW                 0x07
-#define PCF2127_REG_MO                 0x08
-#define PCF2127_REG_YR                 0x09
 /* Alarm registers */
-#define PCF2127_REG_ALARM_SC           0x0A
-#define PCF2127_REG_ALARM_MN           0x0B
-#define PCF2127_REG_ALARM_HR           0x0C
-#define PCF2127_REG_ALARM_DM           0x0D
-#define PCF2127_REG_ALARM_DW           0x0E
+#define PCF2127_REG_ALARM_BASE         0x0A
 #define PCF2127_BIT_ALARM_AE                   BIT(7)
 /* CLKOUT control register */
 #define PCF2127_REG_CLKOUT             0x0f
 #define PCF2127_BIT_WD_CTL_CD0                 BIT(6)
 #define PCF2127_BIT_WD_CTL_CD1                 BIT(7)
 #define PCF2127_REG_WD_VAL             0x11
-/* Tamper timestamp registers */
-#define PCF2127_REG_TS_CTRL            0x12
+/* Tamper timestamp1 registers */
+#define PCF2127_REG_TS1_BASE           0x12
 #define PCF2127_BIT_TS_CTRL_TSOFF              BIT(6)
 #define PCF2127_BIT_TS_CTRL_TSM                        BIT(7)
-#define PCF2127_REG_TS_SC              0x13
-#define PCF2127_REG_TS_MN              0x14
-#define PCF2127_REG_TS_HR              0x15
-#define PCF2127_REG_TS_DM              0x16
-#define PCF2127_REG_TS_MO              0x17
-#define PCF2127_REG_TS_YR              0x18
 /*
  * RAM registers
  * PCF2127 has 512 bytes general-purpose static RAM (SRAM) that is
  * battery backed and can survive a power outage.
- * PCF2129 doesn't have this feature.
+ * PCF2129/31 doesn't have this feature.
  */
 #define PCF2127_REG_RAM_ADDR_MSB       0x1A
 #define PCF2127_REG_RAM_WRT_CMD                0x1C
 
 /* Watchdog timer value constants */
 #define PCF2127_WD_VAL_STOP            0
-#define PCF2127_WD_VAL_MIN             2
-#define PCF2127_WD_VAL_MAX             255
-#define PCF2127_WD_VAL_DEFAULT         60
+/* PCF2127/29 watchdog timer value constants */
+#define PCF2127_WD_CLOCK_HZ_X1000      1000 /* 1Hz */
+#define PCF2127_WD_MIN_HW_HEARTBEAT_MS 500
+/* PCF2131 watchdog timer value constants */
+#define PCF2131_WD_CLOCK_HZ_X1000      250  /* 1/4Hz */
+#define PCF2131_WD_MIN_HW_HEARTBEAT_MS 4000
+
+#define PCF2127_WD_DEFAULT_TIMEOUT_S   60
 
 /* Mask for currently enabled interrupts */
 #define PCF2127_CTRL1_IRQ_MASK (PCF2127_BIT_CTRL1_TSF1)
                PCF2127_BIT_CTRL2_WDTF | \
                PCF2127_BIT_CTRL2_TSF2)
 
+#define PCF2127_MAX_TS_SUPPORTED       4
+
+/* Control register 4 */
+#define PCF2131_REG_CTRL4              0x03
+#define PCF2131_BIT_CTRL4_TSF4                 BIT(4)
+#define PCF2131_BIT_CTRL4_TSF3                 BIT(5)
+#define PCF2131_BIT_CTRL4_TSF2                 BIT(6)
+#define PCF2131_BIT_CTRL4_TSF1                 BIT(7)
+/* Control register 5 */
+#define PCF2131_REG_CTRL5              0x04
+#define PCF2131_BIT_CTRL5_TSIE4                        BIT(4)
+#define PCF2131_BIT_CTRL5_TSIE3                        BIT(5)
+#define PCF2131_BIT_CTRL5_TSIE2                        BIT(6)
+#define PCF2131_BIT_CTRL5_TSIE1                        BIT(7)
+/* Software reset register */
+#define PCF2131_REG_SR_RESET           0x05
+#define PCF2131_SR_RESET_READ_PATTERN  (BIT(2) | BIT(5))
+#define PCF2131_SR_RESET_CPR_CMD       (PCF2131_SR_RESET_READ_PATTERN | BIT(7))
+/* Time and date registers */
+#define PCF2131_REG_TIME_BASE          0x07
+/* Alarm registers */
+#define PCF2131_REG_ALARM_BASE         0x0E
+/* CLKOUT control register */
+#define PCF2131_REG_CLKOUT             0x13
+/* Watchdog registers */
+#define PCF2131_REG_WD_CTL             0x35
+#define PCF2131_REG_WD_VAL             0x36
+/* Tamper timestamp1 registers */
+#define PCF2131_REG_TS1_BASE           0x14
+/* Tamper timestamp2 registers */
+#define PCF2131_REG_TS2_BASE           0x1B
+/* Tamper timestamp3 registers */
+#define PCF2131_REG_TS3_BASE           0x22
+/* Tamper timestamp4 registers */
+#define PCF2131_REG_TS4_BASE           0x29
+/* Interrupt mask registers */
+#define PCF2131_REG_INT_A_MASK1                0x31
+#define PCF2131_REG_INT_A_MASK2                0x32
+#define PCF2131_REG_INT_B_MASK1                0x33
+#define PCF2131_REG_INT_B_MASK2                0x34
+#define PCF2131_BIT_INT_BLIE           BIT(0)
+#define PCF2131_BIT_INT_BIE            BIT(1)
+#define PCF2131_BIT_INT_AIE            BIT(2)
+#define PCF2131_BIT_INT_WD_CD          BIT(3)
+#define PCF2131_BIT_INT_SI             BIT(4)
+#define PCF2131_BIT_INT_MI             BIT(5)
+#define PCF2131_CTRL2_IRQ_MASK ( \
+               PCF2127_BIT_CTRL2_AF | \
+               PCF2127_BIT_CTRL2_WDTF)
+#define PCF2131_CTRL4_IRQ_MASK ( \
+               PCF2131_BIT_CTRL4_TSF4 | \
+               PCF2131_BIT_CTRL4_TSF3 | \
+               PCF2131_BIT_CTRL4_TSF2 | \
+               PCF2131_BIT_CTRL4_TSF1)
+
+enum pcf21xx_type {
+       PCF2127,
+       PCF2129,
+       PCF2131,
+       PCF21XX_LAST_ID
+};
+
+struct pcf21xx_ts_config {
+       u8 reg_base; /* Base register to read timestamp values. */
+
+       /*
+        * If the TS input pin is driven to GND, an interrupt can be generated
+        * (supported by all variants).
+        */
+       u8 gnd_detect_reg; /* Interrupt control register address. */
+       u8 gnd_detect_bit; /* Interrupt bit. */
+
+       /*
+        * If the TS input pin is driven to an intermediate level between GND
+        * and supply, an interrupt can be generated (optional feature depending
+        * on variant).
+        */
+       u8 inter_detect_reg; /* Interrupt control register address. */
+       u8 inter_detect_bit; /* Interrupt bit. */
+
+       u8 ie_reg; /* Interrupt enable control register. */
+       u8 ie_bit; /* Interrupt enable bit. */
+};
+
+struct pcf21xx_config {
+       int type; /* IC variant */
+       int max_register;
+       unsigned int has_nvmem:1;
+       unsigned int has_bit_wd_ctl_cd0:1;
+       unsigned int wd_val_reg_readable:1; /* If watchdog value register can be read. */
+       unsigned int has_int_a_b:1; /* PCF2131 supports two interrupt outputs. */
+       u8 reg_time_base; /* Time/date base register. */
+       u8 regs_alarm_base; /* Alarm function base registers. */
+       u8 reg_wd_ctl; /* Watchdog control register. */
+       u8 reg_wd_val; /* Watchdog value register. */
+       u8 reg_clkout; /* Clkout register. */
+       int wdd_clock_hz_x1000; /* Watchdog clock in Hz multiplicated by 1000 */
+       int wdd_min_hw_heartbeat_ms;
+       unsigned int ts_count;
+       struct pcf21xx_ts_config ts[PCF2127_MAX_TS_SUPPORTED];
+       struct attribute_group attribute_group;
+};
+
 struct pcf2127 {
        struct rtc_device *rtc;
        struct watchdog_device wdd;
        struct regmap *regmap;
-       time64_t ts;
-       bool ts_valid;
+       const struct pcf21xx_config *cfg;
        bool irq_enabled;
+       time64_t ts[PCF2127_MAX_TS_SUPPORTED]; /* Timestamp values. */
+       bool ts_valid[PCF2127_MAX_TS_SUPPORTED];  /* Timestamp valid indication. */
 };
 
 /*
@@ -117,27 +216,22 @@ struct pcf2127 {
 static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
-       unsigned char buf[10];
+       unsigned char buf[7];
        int ret;
 
        /*
         * Avoid reading CTRL2 register as it causes WD_VAL register
         * value to reset to 0 which means watchdog is stopped.
         */
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3,
-                              (buf + PCF2127_REG_CTRL3),
-                              ARRAY_SIZE(buf) - PCF2127_REG_CTRL3);
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->reg_time_base,
+                              buf, sizeof(buf));
        if (ret) {
                dev_err(dev, "%s: read error\n", __func__);
                return ret;
        }
 
-       if (buf[PCF2127_REG_CTRL3] & PCF2127_BIT_CTRL3_BLF)
-               dev_info(dev,
-                       "low voltage detected, check/replace RTC battery.\n");
-
        /* Clock integrity is not guaranteed when OSF flag is set. */
-       if (buf[PCF2127_REG_SC] & PCF2127_BIT_SC_OSF) {
+       if (buf[0] & PCF2127_BIT_SC_OSF) {
                /*
                 * no need clear the flag here,
                 * it will be cleared once the new date is saved
@@ -148,20 +242,17 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
        }
 
        dev_dbg(dev,
-               "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, "
+               "%s: raw data is sec=%02x, min=%02x, hr=%02x, "
                "mday=%02x, wday=%02x, mon=%02x, year=%02x\n",
-               __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC],
-               buf[PCF2127_REG_MN], buf[PCF2127_REG_HR],
-               buf[PCF2127_REG_DM], buf[PCF2127_REG_DW],
-               buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]);
-
-       tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F);
-       tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F);
-       tm->tm_hour = bcd2bin(buf[PCF2127_REG_HR] & 0x3F); /* rtc hr 0-23 */
-       tm->tm_mday = bcd2bin(buf[PCF2127_REG_DM] & 0x3F);
-       tm->tm_wday = buf[PCF2127_REG_DW] & 0x07;
-       tm->tm_mon = bcd2bin(buf[PCF2127_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-       tm->tm_year = bcd2bin(buf[PCF2127_REG_YR]);
+               __func__, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]);
+
+       tm->tm_sec = bcd2bin(buf[0] & 0x7F);
+       tm->tm_min = bcd2bin(buf[1] & 0x7F);
+       tm->tm_hour = bcd2bin(buf[2] & 0x3F);
+       tm->tm_mday = bcd2bin(buf[3] & 0x3F);
+       tm->tm_wday = buf[4] & 0x07;
+       tm->tm_mon = bcd2bin(buf[5] & 0x1F) - 1;
+       tm->tm_year = bcd2bin(buf[6]);
        tm->tm_year += 100;
 
        dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -198,14 +289,45 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
        /* year */
        buf[i++] = bin2bcd(tm->tm_year - 100);
 
-       /* write register's data */
-       err = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_SC, buf, i);
+       /* Write access to time registers:
+        * PCF2127/29: no special action required.
+        * PCF2131:    requires setting the STOP and CPR bits. STOP bit needs to
+        *             be cleared after time registers are updated.
+        */
+       if (pcf2127->cfg->type == PCF2131) {
+               err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                        PCF2127_BIT_CTRL1_STOP,
+                                        PCF2127_BIT_CTRL1_STOP);
+               if (err) {
+                       dev_dbg(dev, "setting STOP bit failed\n");
+                       return err;
+               }
+
+               err = regmap_write(pcf2127->regmap, PCF2131_REG_SR_RESET,
+                                  PCF2131_SR_RESET_CPR_CMD);
+               if (err) {
+                       dev_dbg(dev, "sending CPR cmd failed\n");
+                       return err;
+               }
+       }
+
+       /* write time register's data */
+       err = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->reg_time_base, buf, i);
        if (err) {
-               dev_err(dev,
-                       "%s: err=%d", __func__, err);
+               dev_dbg(dev, "%s: err=%d", __func__, err);
                return err;
        }
 
+       if (pcf2127->cfg->type == PCF2131) {
+               /* Clear STOP bit (PCF2131 only) after write is completed. */
+               err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                        PCF2127_BIT_CTRL1_STOP, 0);
+               if (err) {
+                       dev_dbg(dev, "clearing STOP bit failed\n");
+                       return err;
+               }
+       }
+
        return 0;
 }
 
@@ -275,9 +397,16 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset,
 
 static int pcf2127_wdt_ping(struct watchdog_device *wdd)
 {
+       int wd_val;
        struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
 
-       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL, wdd->timeout);
+       /*
+        * Compute counter value of WATCHDG_TIM_VAL to obtain desired period
+        * in seconds, depending on the source clock frequency.
+        */
+       wd_val = ((wdd->timeout * pcf2127->cfg->wdd_clock_hz_x1000) / 1000) + 1;
+
+       return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val, wd_val);
 }
 
 /*
@@ -311,7 +440,7 @@ static int pcf2127_wdt_stop(struct watchdog_device *wdd)
 {
        struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
 
-       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL,
+       return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
                            PCF2127_WD_VAL_STOP);
 }
 
@@ -339,9 +468,25 @@ static const struct watchdog_ops pcf2127_watchdog_ops = {
        .set_timeout = pcf2127_wdt_set_timeout,
 };
 
+/*
+ * Compute watchdog period, t, in seconds, from the WATCHDG_TIM_VAL register
+ * value, n, and the clock frequency, f1000, in Hz x 1000.
+ *
+ * The PCF2127/29 datasheet gives t as:
+ *   t = n / f
+ * The PCF2131 datasheet gives t as:
+ *   t = (n - 1) / f
+ * For both variants, the watchdog is triggered when the WATCHDG_TIM_VAL reaches
+ * the value 1, and not zero. Consequently, the equation from the PCF2131
+ * datasheet seems to be the correct one for both variants.
+ */
+static int pcf2127_watchdog_get_period(int n, int f1000)
+{
+       return (1000 * (n - 1)) / f1000;
+}
+
 static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
 {
-       u32 wdd_timeout;
        int ret;
 
        if (!IS_ENABLED(CONFIG_WATCHDOG) ||
@@ -351,21 +496,35 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
        pcf2127->wdd.parent = dev;
        pcf2127->wdd.info = &pcf2127_wdt_info;
        pcf2127->wdd.ops = &pcf2127_watchdog_ops;
-       pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
-       pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
-       pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
-       pcf2127->wdd.min_hw_heartbeat_ms = 500;
+
+       pcf2127->wdd.min_timeout =
+               pcf2127_watchdog_get_period(
+                       2, pcf2127->cfg->wdd_clock_hz_x1000);
+       pcf2127->wdd.max_timeout =
+               pcf2127_watchdog_get_period(
+                       255, pcf2127->cfg->wdd_clock_hz_x1000);
+       pcf2127->wdd.timeout = PCF2127_WD_DEFAULT_TIMEOUT_S;
+
+       dev_dbg(dev, "%s clock = %d Hz / 1000\n", __func__,
+               pcf2127->cfg->wdd_clock_hz_x1000);
+
+       pcf2127->wdd.min_hw_heartbeat_ms = pcf2127->cfg->wdd_min_hw_heartbeat_ms;
        pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
 
        watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
 
        /* Test if watchdog timer is started by bootloader */
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout);
-       if (ret)
-               return ret;
+       if (pcf2127->cfg->wd_val_reg_readable) {
+               u32 wdd_timeout;
 
-       if (wdd_timeout)
-               set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+               ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
+                                 &wdd_timeout);
+               if (ret)
+                       return ret;
+
+               if (wdd_timeout)
+                       set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+       }
 
        return devm_watchdog_register_device(dev, &pcf2127->wdd);
 }
@@ -386,8 +545,8 @@ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        if (ret)
                return ret;
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
-                              sizeof(buf));
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+                              buf, sizeof(buf));
        if (ret)
                return ret;
 
@@ -437,8 +596,8 @@ static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        buf[3] = bin2bcd(alrm->time.tm_mday);
        buf[4] = PCF2127_BIT_ALARM_AE; /* Do not match on week day */
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
-                               sizeof(buf));
+       ret = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+                               buf, sizeof(buf));
        if (ret)
                return ret;
 
@@ -446,38 +605,35 @@ static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 }
 
 /*
- * This function reads ctrl2 register, caller is responsible for calling
- * pcf2127_wdt_active_ping()
+ * This function reads one timestamp function data, caller is responsible for
+ * calling pcf2127_wdt_active_ping()
  */
-static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts)
+static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts,
+                              int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        struct rtc_time tm;
        int ret;
-       unsigned char data[25];
+       unsigned char data[7];
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL1, data,
-                              sizeof(data));
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->ts[ts_id].reg_base,
+                              data, sizeof(data));
        if (ret) {
                dev_err(dev, "%s: read error ret=%d\n", __func__, ret);
                return ret;
        }
 
        dev_dbg(dev,
-               "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
-               __func__, data[PCF2127_REG_CTRL1], data[PCF2127_REG_CTRL2],
-               data[PCF2127_REG_CTRL3], data[PCF2127_REG_TS_SC],
-               data[PCF2127_REG_TS_MN], data[PCF2127_REG_TS_HR],
-               data[PCF2127_REG_TS_DM], data[PCF2127_REG_TS_MO],
-               data[PCF2127_REG_TS_YR]);
-
-       tm.tm_sec = bcd2bin(data[PCF2127_REG_TS_SC] & 0x7F);
-       tm.tm_min = bcd2bin(data[PCF2127_REG_TS_MN] & 0x7F);
-       tm.tm_hour = bcd2bin(data[PCF2127_REG_TS_HR] & 0x3F);
-       tm.tm_mday = bcd2bin(data[PCF2127_REG_TS_DM] & 0x3F);
+               "%s: raw data is ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
+               __func__, data[1], data[2], data[3], data[4], data[5], data[6]);
+
+       tm.tm_sec = bcd2bin(data[1] & 0x7F);
+       tm.tm_min = bcd2bin(data[2] & 0x7F);
+       tm.tm_hour = bcd2bin(data[3] & 0x3F);
+       tm.tm_mday = bcd2bin(data[4] & 0x3F);
        /* TS_MO register (month) value range: 1-12 */
-       tm.tm_mon = bcd2bin(data[PCF2127_REG_TS_MO] & 0x1F) - 1;
-       tm.tm_year = bcd2bin(data[PCF2127_REG_TS_YR]);
+       tm.tm_mon = bcd2bin(data[5] & 0x1F) - 1;
+       tm.tm_year = bcd2bin(data[6]);
        if (tm.tm_year < 70)
                tm.tm_year += 100; /* assume we are in 1970...2069 */
 
@@ -491,47 +647,84 @@ static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts)
        return 0;
 };
 
-static void pcf2127_rtc_ts_snapshot(struct device *dev)
+static void pcf2127_rtc_ts_snapshot(struct device *dev, int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        int ret;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return;
+
        /* Let userspace read the first timestamp */
-       if (pcf2127->ts_valid)
+       if (pcf2127->ts_valid[ts_id])
                return;
 
-       ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts);
+       ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts[ts_id], ts_id);
        if (!ret)
-               pcf2127->ts_valid = true;
+               pcf2127->ts_valid[ts_id] = true;
 }
 
 static irqreturn_t pcf2127_rtc_irq(int irq, void *dev)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
-       unsigned int ctrl1, ctrl2;
+       unsigned int ctrl2;
        int ret = 0;
 
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
-       if (ret)
-               return IRQ_NONE;
-
        ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
        if (ret)
                return IRQ_NONE;
 
-       if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
-               return IRQ_NONE;
+       if (pcf2127->cfg->ts_count == 1) {
+               /* PCF2127/29 */
+               unsigned int ctrl1;
+
+               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
+               if (ret)
+                       return IRQ_NONE;
+
+               if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
+                       return IRQ_NONE;
+
+               if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
+                       pcf2127_rtc_ts_snapshot(dev, 0);
+
+               if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                    ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+
+               if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                    ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+       } else {
+               /* PCF2131. */
+               unsigned int ctrl4;
+
+               ret = regmap_read(pcf2127->regmap, PCF2131_REG_CTRL4, &ctrl4);
+               if (ret)
+                       return IRQ_NONE;
+
+               if (!(ctrl4 & PCF2131_CTRL4_IRQ_MASK || ctrl2 & PCF2131_CTRL2_IRQ_MASK))
+                       return IRQ_NONE;
 
-       if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
-               pcf2127_rtc_ts_snapshot(dev);
+               if (ctrl4 & PCF2131_CTRL4_IRQ_MASK) {
+                       int i;
+                       int tsf_bit = PCF2131_BIT_CTRL4_TSF1; /* Start at bit 7. */
 
-       if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
-               regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
-                       ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+                       for (i = 0; i < pcf2127->cfg->ts_count; i++) {
+                               if (ctrl4 & tsf_bit)
+                                       pcf2127_rtc_ts_snapshot(dev, i);
 
-       if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
-               regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
-                       ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+                               tsf_bit = tsf_bit >> 1;
+                       }
+
+                       regmap_write(pcf2127->regmap, PCF2131_REG_CTRL4,
+                                    ctrl4 & ~PCF2131_CTRL4_IRQ_MASK);
+               }
+
+               if (ctrl2 & PCF2131_CTRL2_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                    ctrl2 & ~PCF2131_CTRL2_IRQ_MASK);
+       }
 
        if (ctrl2 & PCF2127_BIT_CTRL2_AF)
                rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF);
@@ -552,28 +745,41 @@ static const struct rtc_class_ops pcf2127_rtc_ops = {
 
 /* sysfs interface */
 
-static ssize_t timestamp0_store(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
+static ssize_t timestamp_store(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count, int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
        int ret;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return 0;
+
        if (pcf2127->irq_enabled) {
-               pcf2127->ts_valid = false;
+               pcf2127->ts_valid[ts_id] = false;
        } else {
-               ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
-                       PCF2127_BIT_CTRL1_TSF1, 0);
+               /* Always clear GND interrupt bit. */
+               ret = regmap_update_bits(pcf2127->regmap,
+                                        pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+                                        pcf2127->cfg->ts[ts_id].gnd_detect_bit,
+                                        0);
+
                if (ret) {
-                       dev_err(dev, "%s: update ctrl1 ret=%d\n", __func__, ret);
+                       dev_err(dev, "%s: update TS gnd detect ret=%d\n", __func__, ret);
                        return ret;
                }
 
-               ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
-                       PCF2127_BIT_CTRL2_TSF2, 0);
-               if (ret) {
-                       dev_err(dev, "%s: update ctrl2 ret=%d\n", __func__, ret);
-                       return ret;
+               if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+                       /* Clear intermediate level interrupt bit if supported. */
+                       ret = regmap_update_bits(pcf2127->regmap,
+                                                pcf2127->cfg->ts[ts_id].inter_detect_reg,
+                                                pcf2127->cfg->ts[ts_id].inter_detect_bit,
+                                                0);
+                       if (ret) {
+                               dev_err(dev, "%s: update TS intermediate level detect ret=%d\n",
+                                       __func__, ret);
+                               return ret;
+                       }
                }
 
                ret = pcf2127_wdt_active_ping(&pcf2127->wdd);
@@ -582,34 +788,84 @@ static ssize_t timestamp0_store(struct device *dev,
        }
 
        return count;
+}
+
+static ssize_t timestamp0_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 0);
 };
 
-static ssize_t timestamp0_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t timestamp1_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 1);
+};
+
+static ssize_t timestamp2_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 2);
+};
+
+static ssize_t timestamp3_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 3);
+};
+
+static ssize_t timestamp_show(struct device *dev,
+                             struct device_attribute *attr, char *buf,
+                             int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
-       unsigned int ctrl1, ctrl2;
        int ret;
        time64_t ts;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return 0;
+
        if (pcf2127->irq_enabled) {
-               if (!pcf2127->ts_valid)
+               if (!pcf2127->ts_valid[ts_id])
                        return 0;
-               ts = pcf2127->ts;
+               ts = pcf2127->ts[ts_id];
        } else {
-               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
-               if (ret)
-                       return 0;
+               u8 valid_low = 0;
+               u8 valid_inter = 0;
+               unsigned int ctrl;
 
-               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
+               /* Check if TS input pin is driven to GND, supported by all
+                * variants.
+                */
+               ret = regmap_read(pcf2127->regmap,
+                                 pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+                                 &ctrl);
                if (ret)
                        return 0;
 
-               if (!(ctrl1 & PCF2127_BIT_CTRL1_TSF1) &&
-                   !(ctrl2 & PCF2127_BIT_CTRL2_TSF2))
+               valid_low = ctrl & pcf2127->cfg->ts[ts_id].gnd_detect_bit;
+
+               if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+                       /* Check if TS input pin is driven to intermediate level
+                        * between GND and supply, if supported by variant.
+                        */
+                       ret = regmap_read(pcf2127->regmap,
+                                         pcf2127->cfg->ts[ts_id].inter_detect_reg,
+                                         &ctrl);
+                       if (ret)
+                               return 0;
+
+                       valid_inter = ctrl & pcf2127->cfg->ts[ts_id].inter_detect_bit;
+               }
+
+               if (!valid_low && !valid_inter)
                        return 0;
 
-               ret = pcf2127_rtc_ts_read(dev->parent, &ts);
+               ret = pcf2127_rtc_ts_read(dev->parent, &ts, ts_id);
                if (ret)
                        return 0;
 
@@ -618,21 +874,227 @@ static ssize_t timestamp0_show(struct device *dev,
                        return ret;
        }
        return sprintf(buf, "%llu\n", (unsigned long long)ts);
+}
+
+static ssize_t timestamp0_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 0);
+};
+
+static ssize_t timestamp1_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 1);
+};
+
+static ssize_t timestamp2_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 2);
+};
+
+static ssize_t timestamp3_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 3);
 };
 
 static DEVICE_ATTR_RW(timestamp0);
+static DEVICE_ATTR_RW(timestamp1);
+static DEVICE_ATTR_RW(timestamp2);
+static DEVICE_ATTR_RW(timestamp3);
 
 static struct attribute *pcf2127_attrs[] = {
        &dev_attr_timestamp0.attr,
        NULL
 };
 
-static const struct attribute_group pcf2127_attr_group = {
-       .attrs  = pcf2127_attrs,
+static struct attribute *pcf2131_attrs[] = {
+       &dev_attr_timestamp0.attr,
+       &dev_attr_timestamp1.attr,
+       &dev_attr_timestamp2.attr,
+       &dev_attr_timestamp3.attr,
+       NULL
 };
 
+static struct pcf21xx_config pcf21xx_cfg[] = {
+       [PCF2127] = {
+               .type = PCF2127,
+               .max_register = 0x1d,
+               .has_nvmem = 1,
+               .has_bit_wd_ctl_cd0 = 1,
+               .wd_val_reg_readable = 1,
+               .has_int_a_b = 0,
+               .reg_time_base = PCF2127_REG_TIME_BASE,
+               .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2127_REG_WD_CTL,
+               .reg_wd_val = PCF2127_REG_WD_VAL,
+               .reg_clkout = PCF2127_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 1,
+               .ts[0] = {
+                       .reg_base  = PCF2127_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2127_REG_CTRL1,
+                       .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+                       .inter_detect_reg = PCF2127_REG_CTRL2,
+                       .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+                       .ie_reg    = PCF2127_REG_CTRL2,
+                       .ie_bit    = PCF2127_BIT_CTRL2_TSIE,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2127_attrs,
+               },
+       },
+       [PCF2129] = {
+               .type = PCF2129,
+               .max_register = 0x19,
+               .has_nvmem = 0,
+               .has_bit_wd_ctl_cd0 = 0,
+               .wd_val_reg_readable = 1,
+               .has_int_a_b = 0,
+               .reg_time_base = PCF2127_REG_TIME_BASE,
+               .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2127_REG_WD_CTL,
+               .reg_wd_val = PCF2127_REG_WD_VAL,
+               .reg_clkout = PCF2127_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 1,
+               .ts[0] = {
+                       .reg_base  = PCF2127_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2127_REG_CTRL1,
+                       .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+                       .inter_detect_reg = PCF2127_REG_CTRL2,
+                       .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+                       .ie_reg    = PCF2127_REG_CTRL2,
+                       .ie_bit    = PCF2127_BIT_CTRL2_TSIE,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2127_attrs,
+               },
+       },
+       [PCF2131] = {
+               .type = PCF2131,
+               .max_register = 0x36,
+               .has_nvmem = 0,
+               .has_bit_wd_ctl_cd0 = 0,
+               .wd_val_reg_readable = 0,
+               .has_int_a_b = 1,
+               .reg_time_base = PCF2131_REG_TIME_BASE,
+               .regs_alarm_base = PCF2131_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2131_REG_WD_CTL,
+               .reg_wd_val = PCF2131_REG_WD_VAL,
+               .reg_clkout = PCF2131_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2131_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2131_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 4,
+               .ts[0] = {
+                       .reg_base  = PCF2131_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF1,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE1,
+               },
+               .ts[1] = {
+                       .reg_base  = PCF2131_REG_TS2_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF2,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE2,
+               },
+               .ts[2] = {
+                       .reg_base  = PCF2131_REG_TS3_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF3,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE3,
+               },
+               .ts[3] = {
+                       .reg_base  = PCF2131_REG_TS4_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF4,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE4,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2131_attrs,
+               },
+       },
+};
+
+/*
+ * Enable timestamp function and corresponding interrupt(s).
+ */
+static int pcf2127_enable_ts(struct device *dev, int ts_id)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+       int ret;
+
+       if (ts_id >= pcf2127->cfg->ts_count) {
+               dev_err(dev, "%s: invalid tamper detection ID (%d)\n",
+                       __func__, ts_id);
+               return -EINVAL;
+       }
+
+       /* Enable timestamp function. */
+       ret = regmap_update_bits(pcf2127->regmap,
+                                pcf2127->cfg->ts[ts_id].reg_base,
+                                PCF2127_BIT_TS_CTRL_TSOFF |
+                                PCF2127_BIT_TS_CTRL_TSM,
+                                PCF2127_BIT_TS_CTRL_TSM);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection config (ts%d_ctrl) failed\n",
+                       __func__, ts_id);
+               return ret;
+       }
+
+       /*
+        * Enable interrupt generation when TSF timestamp flag is set.
+        * Interrupt signals are open-drain outputs and can be left floating if
+        * unused.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->ts[ts_id].ie_reg,
+                                pcf2127->cfg->ts[ts_id].ie_bit,
+                                pcf2127->cfg->ts[ts_id].ie_bit);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection TSIE%d config failed\n",
+                       __func__, ts_id);
+               return ret;
+       }
+
+       return ret;
+}
+
+/* Route all interrupt sources to INT A pin. */
+static int pcf2127_configure_interrupt_pins(struct device *dev)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+       int ret;
+
+       /* Mask bits need to be cleared to enable corresponding
+        * interrupt source.
+        */
+       ret = regmap_write(pcf2127->regmap,
+                          PCF2131_REG_INT_A_MASK1, 0);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(pcf2127->regmap,
+                          PCF2131_REG_INT_A_MASK2, 0);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static int pcf2127_probe(struct device *dev, struct regmap *regmap,
-                        int alarm_irq, const char *name, bool is_pcf2127)
+                        int alarm_irq, const struct pcf21xx_config *config)
 {
        struct pcf2127 *pcf2127;
        int ret = 0;
@@ -645,6 +1107,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                return -ENOMEM;
 
        pcf2127->regmap = regmap;
+       pcf2127->cfg = config;
 
        dev_set_drvdata(dev, pcf2127);
 
@@ -656,8 +1119,16 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
        pcf2127->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099;
        pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */
-       set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
-       clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+
+       /*
+        * PCF2127/29 do not work correctly when setting alarms at 1s intervals.
+        * PCF2131 is ok.
+        */
+       if (pcf2127->cfg->type == PCF2127 || pcf2127->cfg->type == PCF2129) {
+               set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
+               clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+       }
+
        clear_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
 
        if (alarm_irq > 0) {
@@ -688,7 +1159,16 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                set_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
        }
 
-       if (is_pcf2127) {
+       if (pcf2127->cfg->has_int_a_b) {
+               /* Configure int A/B pins, independently of alarm_irq. */
+               ret = pcf2127_configure_interrupt_pins(dev);
+               if (ret) {
+                       dev_err(dev, "failed to configure interrupt pins\n");
+                       return ret;
+               }
+       }
+
+       if (pcf2127->cfg->has_nvmem) {
                struct nvmem_config nvmem_cfg = {
                        .priv = pcf2127,
                        .reg_read = pcf2127_nvmem_read,
@@ -703,15 +1183,17 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
         * The "Power-On Reset Override" facility prevents the RTC to do a reset
         * after power on. For normal operation the PORO must be disabled.
         */
-       regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+       ret = regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
                                PCF2127_BIT_CTRL1_POR_OVRD);
+       if (ret < 0)
+               return ret;
 
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_CLKOUT, &val);
+       ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_clkout, &val);
        if (ret < 0)
                return ret;
 
        if (!(val & PCF2127_BIT_CLKOUT_OTPR)) {
-               ret = regmap_set_bits(pcf2127->regmap, PCF2127_REG_CLKOUT,
+               ret = regmap_set_bits(pcf2127->regmap, pcf2127->cfg->reg_clkout,
                                      PCF2127_BIT_CLKOUT_OTPR);
                if (ret < 0)
                        return ret;
@@ -721,20 +1203,20 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
        /*
         * Watchdog timer enabled and reset pin /RST activated when timed out.
-        * Select 1Hz clock source for watchdog timer.
+        * Select 1Hz clock source for watchdog timer (1/4Hz for PCF2131).
         * Note: Countdown timer disabled and not available.
-        * For pca2129, pcf2129, only bit[7] is for Symbol WD_CD
+        * For pca2129, pcf2129 and pcf2131, only bit[7] is for Symbol WD_CD
         * of register watchdg_tim_ctl. The bit[6] is labeled
         * as T. Bits labeled as T must always be written with
         * logic 0.
         */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_WD_CTL,
+       ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->reg_wd_ctl,
                                 PCF2127_BIT_WD_CTL_CD1 |
                                 PCF2127_BIT_WD_CTL_CD0 |
                                 PCF2127_BIT_WD_CTL_TF1 |
                                 PCF2127_BIT_WD_CTL_TF0,
                                 PCF2127_BIT_WD_CTL_CD1 |
-                                (is_pcf2127 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
+                                (pcf2127->cfg->has_bit_wd_ctl_cd0 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
                                 PCF2127_BIT_WD_CTL_TF1);
        if (ret) {
                dev_err(dev, "%s: watchdog config (wd_ctl) failed\n", __func__);
@@ -760,34 +1242,15 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
        }
 
        /*
-        * Enable timestamp function and store timestamp of first trigger
-        * event until TSF1 and TSF2 interrupt flags are cleared.
-        */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
-                                PCF2127_BIT_TS_CTRL_TSOFF |
-                                PCF2127_BIT_TS_CTRL_TSM,
-                                PCF2127_BIT_TS_CTRL_TSM);
-       if (ret) {
-               dev_err(dev, "%s: tamper detection config (ts_ctrl) failed\n",
-                       __func__);
-               return ret;
-       }
-
-       /*
-        * Enable interrupt generation when TSF1 or TSF2 timestamp flags
-        * are set. Interrupt signal is an open-drain output and can be
-        * left floating if unused.
+        * Enable timestamp functions 1 to 4.
         */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
-                                PCF2127_BIT_CTRL2_TSIE,
-                                PCF2127_BIT_CTRL2_TSIE);
-       if (ret) {
-               dev_err(dev, "%s: tamper detection config (ctrl2) failed\n",
-                       __func__);
-               return ret;
+       for (int i = 0; i < pcf2127->cfg->ts_count; i++) {
+               ret = pcf2127_enable_ts(dev, i);
+               if (ret)
+                       return ret;
        }
 
-       ret = rtc_add_group(pcf2127->rtc, &pcf2127_attr_group);
+       ret = rtc_add_group(pcf2127->rtc, &pcf2127->cfg->attribute_group);
        if (ret) {
                dev_err(dev, "%s: tamper sysfs registering failed\n",
                        __func__);
@@ -799,9 +1262,10 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
 #ifdef CONFIG_OF
 static const struct of_device_id pcf2127_of_match[] = {
-       { .compatible = "nxp,pcf2127" },
-       { .compatible = "nxp,pcf2129" },
-       { .compatible = "nxp,pca2129" },
+       { .compatible = "nxp,pcf2127", .data = &pcf21xx_cfg[PCF2127] },
+       { .compatible = "nxp,pcf2129", .data = &pcf21xx_cfg[PCF2129] },
+       { .compatible = "nxp,pca2129", .data = &pcf21xx_cfg[PCF2129] },
+       { .compatible = "nxp,pcf2131", .data = &pcf21xx_cfg[PCF2131] },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf2127_of_match);
@@ -886,26 +1350,41 @@ static const struct regmap_bus pcf2127_i2c_regmap = {
 static struct i2c_driver pcf2127_i2c_driver;
 
 static const struct i2c_device_id pcf2127_i2c_id[] = {
-       { "pcf2127", 1 },
-       { "pcf2129", 0 },
-       { "pca2129", 0 },
+       { "pcf2127", PCF2127 },
+       { "pcf2129", PCF2129 },
+       { "pca2129", PCF2129 },
+       { "pcf2131", PCF2131 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id);
 
 static int pcf2127_i2c_probe(struct i2c_client *client)
 {
-       const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client);
        struct regmap *regmap;
-       static const struct regmap_config config = {
+       static struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
-               .max_register = 0x1d,
        };
+       const struct pcf21xx_config *variant;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
 
+       if (client->dev.of_node) {
+               variant = of_device_get_match_data(&client->dev);
+               if (!variant)
+                       return -ENODEV;
+       } else {
+               enum pcf21xx_type type =
+                       i2c_match_id(pcf2127_i2c_id, client)->driver_data;
+
+               if (type >= PCF21XX_LAST_ID)
+                       return -ENODEV;
+               variant = &pcf21xx_cfg[type];
+       }
+
+       config.max_register = variant->max_register,
+
        regmap = devm_regmap_init(&client->dev, &pcf2127_i2c_regmap,
                                        &client->dev, &config);
        if (IS_ERR(regmap)) {
@@ -914,8 +1393,7 @@ static int pcf2127_i2c_probe(struct i2c_client *client)
                return PTR_ERR(regmap);
        }
 
-       return pcf2127_probe(&client->dev, regmap, client->irq,
-                            pcf2127_i2c_driver.driver.name, id->driver_data);
+       return pcf2127_probe(&client->dev, regmap, client->irq, variant);
 }
 
 static struct i2c_driver pcf2127_i2c_driver = {
@@ -953,17 +1431,32 @@ static void pcf2127_i2c_unregister_driver(void)
 #if IS_ENABLED(CONFIG_SPI_MASTER)
 
 static struct spi_driver pcf2127_spi_driver;
+static const struct spi_device_id pcf2127_spi_id[];
 
 static int pcf2127_spi_probe(struct spi_device *spi)
 {
-       static const struct regmap_config config = {
+       static struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
                .read_flag_mask = 0xa0,
                .write_flag_mask = 0x20,
-               .max_register = 0x1d,
        };
        struct regmap *regmap;
+       const struct pcf21xx_config *variant;
+
+       if (spi->dev.of_node) {
+               variant = of_device_get_match_data(&spi->dev);
+               if (!variant)
+                       return -ENODEV;
+       } else {
+               enum pcf21xx_type type = spi_get_device_id(spi)->driver_data;
+
+               if (type >= PCF21XX_LAST_ID)
+                       return -ENODEV;
+               variant = &pcf21xx_cfg[type];
+       }
+
+       config.max_register = variant->max_register,
 
        regmap = devm_regmap_init_spi(spi, &config);
        if (IS_ERR(regmap)) {
@@ -972,15 +1465,14 @@ static int pcf2127_spi_probe(struct spi_device *spi)
                return PTR_ERR(regmap);
        }
 
-       return pcf2127_probe(&spi->dev, regmap, spi->irq,
-                            pcf2127_spi_driver.driver.name,
-                            spi_get_device_id(spi)->driver_data);
+       return pcf2127_probe(&spi->dev, regmap, spi->irq, variant);
 }
 
 static const struct spi_device_id pcf2127_spi_id[] = {
-       { "pcf2127", 1 },
-       { "pcf2129", 0 },
-       { "pca2129", 0 },
+       { "pcf2127", PCF2127 },
+       { "pcf2129", PCF2129 },
+       { "pca2129", PCF2129 },
+       { "pcf2131", PCF2131 },
        { }
 };
 MODULE_DEVICE_TABLE(spi, pcf2127_spi_id);
@@ -1045,5 +1537,5 @@ static void __exit pcf2127_exit(void)
 module_exit(pcf2127_exit)
 
 MODULE_AUTHOR("Renaud Cerrato <r.cerrato@til-technologies.fr>");
-MODULE_DESCRIPTION("NXP PCF2127/29 RTC driver");
+MODULE_DESCRIPTION("NXP PCF2127/29/31 RTC driver");
 MODULE_LICENSE("GPL v2");
index e517abf..fdbc07f 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/bcd.h>
 #include <linux/rtc.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 
@@ -514,49 +514,40 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
 }
 #endif
 
-enum pcf85063_type {
-       PCF85063,
-       PCF85063TP,
-       PCF85063A,
-       RV8263,
-       PCF85063_LAST_ID
+static const struct pcf85063_config config_pcf85063 = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x0a,
+       },
 };
 
-static struct pcf85063_config pcf85063_cfg[] = {
-       [PCF85063] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x0a,
-               },
-       },
-       [PCF85063TP] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x0a,
-               },
-       },
-       [PCF85063A] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x11,
-               },
-               .has_alarms = 1,
+static const struct pcf85063_config config_pcf85063tp = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x0a,
        },
-       [RV8263] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x11,
-               },
-               .has_alarms = 1,
-               .force_cap_7000 = 1,
+};
+
+static const struct pcf85063_config config_pcf85063a = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x11,
        },
+       .has_alarms = 1,
 };
 
-static const struct i2c_device_id pcf85063_ids[];
+static const struct pcf85063_config config_rv8263 = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x11,
+       },
+       .has_alarms = 1,
+       .force_cap_7000 = 1,
+};
 
 static int pcf85063_probe(struct i2c_client *client)
 {
@@ -579,17 +570,9 @@ static int pcf85063_probe(struct i2c_client *client)
        if (!pcf85063)
                return -ENOMEM;
 
-       if (client->dev.of_node) {
-               config = of_device_get_match_data(&client->dev);
-               if (!config)
-                       return -ENODEV;
-       } else {
-               enum pcf85063_type type =
-                       i2c_match_id(pcf85063_ids, client)->driver_data;
-               if (type >= PCF85063_LAST_ID)
-                       return -ENODEV;
-               config = &pcf85063_cfg[type];
-       }
+       config = i2c_get_match_data(client);
+       if (!config)
+               return -ENODEV;
 
        pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
        if (IS_ERR(pcf85063->regmap))
@@ -655,22 +638,22 @@ static int pcf85063_probe(struct i2c_client *client)
 }
 
 static const struct i2c_device_id pcf85063_ids[] = {
-       { "pca85073a", PCF85063A },
-       { "pcf85063", PCF85063 },
-       { "pcf85063tp", PCF85063TP },
-       { "pcf85063a", PCF85063A },
-       { "rv8263", RV8263 },
+       { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+       { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 },
+       { "pcf85063tp", .driver_data = (kernel_ulong_t)&config_pcf85063tp },
+       { "pcf85063a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+       { "rv8263", .driver_data = (kernel_ulong_t)&config_rv8263 },
        {}
 };
 MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
 
 #ifdef CONFIG_OF
 static const struct of_device_id pcf85063_of_match[] = {
-       { .compatible = "nxp,pca85073a", .data = &pcf85063_cfg[PCF85063A] },
-       { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
-       { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
-       { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
-       { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
+       { .compatible = "nxp,pca85073a", .data = &config_pcf85063a },
+       { .compatible = "nxp,pcf85063", .data = &config_pcf85063 },
+       { .compatible = "nxp,pcf85063tp", .data = &config_pcf85063tp },
+       { .compatible = "nxp,pcf85063a", .data = &config_pcf85063a },
+       { .compatible = "microcrystal,rv8263", .data = &config_rv8263 },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf85063_of_match);
index 65b8b13..0619467 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/bcd.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 /*
@@ -403,6 +402,7 @@ static int pcf85363_probe(struct i2c_client *client)
                },
        };
        int ret, i, err;
+       bool wakeup_source;
 
        if (data)
                config = data;
@@ -432,25 +432,36 @@ static int pcf85363_probe(struct i2c_client *client)
        pcf85363->rtc->ops = &rtc_ops;
        pcf85363->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pcf85363->rtc->range_max = RTC_TIMESTAMP_END_2099;
-       clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+
+       wakeup_source = device_property_read_bool(&client->dev,
+                                                 "wakeup-source");
+       if (client->irq > 0 || wakeup_source) {
+               regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+               regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+                                  PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+       }
 
        if (client->irq > 0) {
                unsigned long irqflags = IRQF_TRIGGER_LOW;
 
                if (dev_fwnode(&client->dev))
                        irqflags = 0;
-
-               regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
-               regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
-                                  PIN_IO_INTA_OUT, PIN_IO_INTAPM);
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, pcf85363_rtc_handle_irq,
                                                irqflags | IRQF_ONESHOT,
                                                "pcf85363", client);
-               if (ret)
-                       dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
-               else
-                       set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+               if (ret) {
+                       dev_warn(&client->dev,
+                                "unable to request IRQ, alarms disabled\n");
+                       client->irq = 0;
+               }
+       }
+
+       if (client->irq > 0 || wakeup_source) {
+               device_init_wakeup(&client->dev, true);
+               set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+       } else {
+               clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
        }
 
        ret = devm_rtc_register_device(pcf85363->rtc);
index eeacf48..e400c78 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include "rtc-sa1100.h"
 
index a5a6c87..f8fab02 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /*
  * Ricoh has a family of I2C based RTCs, which differ only slightly from
@@ -826,8 +826,7 @@ static int rs5c372_probe(struct i2c_client *client)
        rs5c372->client = client;
        i2c_set_clientdata(client, rs5c372);
        if (client->dev.of_node) {
-               rs5c372->type = (enum rtc_type)
-                       of_device_get_match_data(&client->dev);
+               rs5c372->type = (uintptr_t)of_device_get_match_data(&client->dev);
        } else {
                const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client);
                rs5c372->type = id->driver_data;
index 076e56f..2f001c5 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 
@@ -855,11 +855,68 @@ static const struct regmap_config regmap_config = {
         .max_register = 0x37,
 };
 
+static u8 rv3028_set_trickle_charger(struct rv3028_data *rv3028,
+                                    struct i2c_client *client)
+{
+       int ret, val_old, val;
+       u32 ohms, chargeable;
+
+       ret = regmap_read(rv3028->regmap, RV3028_BACKUP, &val_old);
+       if (ret < 0)
+               return ret;
+
+       /* mask out only trickle charger bits */
+       val_old = val_old & (RV3028_BACKUP_TCE | RV3028_BACKUP_TCR_MASK);
+       val = val_old;
+
+       /* setup trickle charger */
+       if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
+                                     &ohms)) {
+               int i;
+
+               for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
+                       if (ohms == rv3028_trickle_resistors[i])
+                               break;
+
+               if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
+                       /* enable trickle charger and its resistor */
+                       val = RV3028_BACKUP_TCE | i;
+               } else {
+                       dev_warn(&client->dev, "invalid trickle resistor value\n");
+               }
+       }
+
+       if (!device_property_read_u32(&client->dev, "aux-voltage-chargeable",
+                                     &chargeable)) {
+               switch (chargeable) {
+               case 0:
+                       val &= ~RV3028_BACKUP_TCE;
+                       break;
+               case 1:
+                       val |= RV3028_BACKUP_TCE;
+                       break;
+               default:
+                       dev_warn(&client->dev,
+                                "unsupported aux-voltage-chargeable value\n");
+                       break;
+               }
+       }
+
+       /* only update EEPROM if changes are necessary */
+       if (val_old != val) {
+               ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
+                                               RV3028_BACKUP_TCR_MASK, val);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
 static int rv3028_probe(struct i2c_client *client)
 {
        struct rv3028_data *rv3028;
        int ret, status;
-       u32 ohms;
        struct nvmem_config nvmem_cfg = {
                .name = "rv3028_nvram",
                .word_size = 1,
@@ -937,24 +994,9 @@ static int rv3028_probe(struct i2c_client *client)
        if (ret)
                return ret;
 
-       /* setup trickle charger */
-       if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
-                                     &ohms)) {
-               int i;
-
-               for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
-                       if (ohms == rv3028_trickle_resistors[i])
-                               break;
-
-               if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
-                       ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
-                                                RV3028_BACKUP_TCR_MASK, RV3028_BACKUP_TCE | i);
-                       if (ret)
-                               return ret;
-               } else {
-                       dev_warn(&client->dev, "invalid trickle resistor value\n");
-               }
-       }
+       ret = rv3028_set_trickle_charger(rv3028, client);
+       if (ret)
+               return ret;
 
        ret = rtc_add_group(rv3028->rtc, &rv3028_attr_group);
        if (ret)
index 6b8eb20..35b2e36 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 
index 98679ca..1a3ec1b 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 
 #define RV8803_I2C_TRY_COUNT           4
@@ -645,8 +645,7 @@ static int rv8803_probe(struct i2c_client *client)
        mutex_init(&rv8803->flags_lock);
        rv8803->client = client;
        if (client->dev.of_node) {
-               rv8803->type = (enum rv8803_type)
-                       of_device_get_match_data(&client->dev);
+               rv8803->type = (uintptr_t)of_device_get_match_data(&client->dev);
        } else {
                const struct i2c_device_id *id = i2c_match_id(rv8803_id, client);
 
index 8702db6..834274d 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
 
index 82881fd..48efd61 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/i2c.h>
 #include <linux/bcd.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/log2.h>
index dca736c..56ebbd4 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/rtc.h>
@@ -227,7 +227,7 @@ static int rzn1_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
                return ret;
 
        /* We cannot set alarms more than one week ahead */
-       farest = rtc_tm_to_time64(&tm_now) + (7 * 86400);
+       farest = rtc_tm_to_time64(&tm_now) + rtc->rtcdev->alarm_offset_max;
        alarm = rtc_tm_to_time64(tm);
        if (time_after(alarm, farest))
                return -ERANGE;
@@ -351,6 +351,7 @@ static int rzn1_rtc_probe(struct platform_device *pdev)
 
        rtc->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099;
+       rtc->rtcdev->alarm_offset_max = 7 * 86400;
        rtc->rtcdev->ops = &rzn1_rtc_ops;
        set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features);
        clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features);
index 70e1a18..2822388 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
index 3d36e11..76753c7 100644 (file)
@@ -6,11 +6,13 @@
 
 #include <linux/bcd.h>
 #include <linux/clk.h>
+#include <linux/errno.h>
 #include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
@@ -89,6 +91,9 @@
 /* Max STM32 RTC register offset is 0x3FC */
 #define UNDEF_REG                      0xFFFF
 
+/* STM32 RTC driver time helpers */
+#define SEC_PER_DAY            (24 * 60 * 60)
+
 struct stm32_rtc;
 
 struct stm32_rtc_registers {
@@ -114,6 +119,7 @@ struct stm32_rtc_data {
        void (*clear_events)(struct stm32_rtc *rtc, unsigned int flags);
        bool has_pclk;
        bool need_dbp;
+       bool need_accuracy;
 };
 
 struct stm32_rtc {
@@ -158,10 +164,9 @@ static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
                 * slowest rtc_ck frequency may be 32kHz and highest should be
                 * 1MHz, we poll every 10 us with a timeout of 100ms.
                 */
-               return readl_relaxed_poll_timeout_atomic(
-                                       rtc->base + regs->isr,
-                                       isr, (isr & STM32_RTC_ISR_INITF),
-                                       10, 100000);
+               return readl_relaxed_poll_timeout_atomic(rtc->base + regs->isr, isr,
+                                                        (isr & STM32_RTC_ISR_INITF),
+                                                        10, 100000);
        }
 
        return 0;
@@ -425,40 +430,42 @@ static int stm32_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
-static int stm32_rtc_valid_alrm(struct stm32_rtc *rtc, struct rtc_time *tm)
+static int stm32_rtc_valid_alrm(struct device *dev, struct rtc_time *tm)
 {
-       const struct stm32_rtc_registers *regs = &rtc->data->regs;
-       int cur_day, cur_mon, cur_year, cur_hour, cur_min, cur_sec;
-       unsigned int dr = readl_relaxed(rtc->base + regs->dr);
-       unsigned int tr = readl_relaxed(rtc->base + regs->tr);
-
-       cur_day = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
-       cur_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
-       cur_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
-       cur_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
-       cur_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
-       cur_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
+       static struct rtc_time now;
+       time64_t max_alarm_time64;
+       int max_day_forward;
+       int next_month;
+       int next_year;
 
        /*
         * Assuming current date is M-D-Y H:M:S.
         * RTC alarm can't be set on a specific month and year.
         * So the valid alarm range is:
         *      M-D-Y H:M:S < alarm <= (M+1)-D-Y H:M:S
-        * with a specific case for December...
         */
-       if ((((tm->tm_year > cur_year) &&
-             (tm->tm_mon == 0x1) && (cur_mon == 0x12)) ||
-            ((tm->tm_year == cur_year) &&
-             (tm->tm_mon <= cur_mon + 1))) &&
-           ((tm->tm_mday > cur_day) ||
-            ((tm->tm_mday == cur_day) &&
-            ((tm->tm_hour > cur_hour) ||
-             ((tm->tm_hour == cur_hour) && (tm->tm_min > cur_min)) ||
-             ((tm->tm_hour == cur_hour) && (tm->tm_min == cur_min) &&
-              (tm->tm_sec >= cur_sec))))))
-               return 0;
+       stm32_rtc_read_time(dev, &now);
+
+       /*
+        * Find the next month and the year of the next month.
+        * Note: tm_mon and next_month are from 0 to 11
+        */
+       next_month = now.tm_mon + 1;
+       if (next_month == 12) {
+               next_month = 0;
+               next_year = now.tm_year + 1;
+       } else {
+               next_year = now.tm_year;
+       }
 
-       return -EINVAL;
+       /* Find the maximum limit of alarm in days. */
+       max_day_forward = rtc_month_days(now.tm_mon, now.tm_year)
+                        - now.tm_mday
+                        + min(rtc_month_days(next_month, next_year), now.tm_mday);
+
+       /* Convert to timestamp and compare the alarm time and its upper limit */
+       max_alarm_time64 = rtc_tm_to_time64(&now) + max_day_forward * SEC_PER_DAY;
+       return rtc_tm_to_time64(tm) <= max_alarm_time64 ? 0 : -EINVAL;
 }
 
 static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -469,17 +476,17 @@ static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        unsigned int cr, isr, alrmar;
        int ret = 0;
 
-       tm2bcd(tm);
-
        /*
         * RTC alarm can't be set on a specific date, unless this date is
         * up to the same day of month next month.
         */
-       if (stm32_rtc_valid_alrm(rtc, tm) < 0) {
+       if (stm32_rtc_valid_alrm(dev, tm) < 0) {
                dev_err(dev, "Alarm can be set only on upcoming month.\n");
                return -EINVAL;
        }
 
+       tm2bcd(tm);
+
        alrmar = 0;
        /* tm_year and tm_mon are not used because not supported by RTC */
        alrmar |= (tm->tm_mday << STM32_RTC_ALRMXR_DATE_SHIFT) &
@@ -545,6 +552,7 @@ static void stm32_rtc_clear_events(struct stm32_rtc *rtc,
 static const struct stm32_rtc_data stm32_rtc_data = {
        .has_pclk = false,
        .need_dbp = true,
+       .need_accuracy = false,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -566,6 +574,7 @@ static const struct stm32_rtc_data stm32_rtc_data = {
 static const struct stm32_rtc_data stm32h7_rtc_data = {
        .has_pclk = true,
        .need_dbp = true,
+       .need_accuracy = false,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -596,6 +605,7 @@ static void stm32mp1_rtc_clear_events(struct stm32_rtc *rtc,
 static const struct stm32_rtc_data stm32mp1_data = {
        .has_pclk = true,
        .need_dbp = false,
+       .need_accuracy = true,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -628,7 +638,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
        const struct stm32_rtc_registers *regs = &rtc->data->regs;
        unsigned int prer, pred_a, pred_s, pred_a_max, pred_s_max, cr;
        unsigned int rate;
-       int ret = 0;
+       int ret;
 
        rate = clk_get_rate(rtc->rtc_ck);
 
@@ -636,18 +646,32 @@ static int stm32_rtc_init(struct platform_device *pdev,
        pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
        pred_s_max = STM32_RTC_PRER_PRED_S >> STM32_RTC_PRER_PRED_S_SHIFT;
 
-       for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
-               pred_s = (rate / (pred_a + 1)) - 1;
+       if (rate > (pred_a_max + 1) * (pred_s_max + 1)) {
+               dev_err(&pdev->dev, "rtc_ck rate is too high: %dHz\n", rate);
+               return -EINVAL;
+       }
+
+       if (rtc->data->need_accuracy) {
+               for (pred_a = 0; pred_a <= pred_a_max; pred_a++) {
+                       pred_s = (rate / (pred_a + 1)) - 1;
+
+                       if (pred_s <= pred_s_max && ((pred_s + 1) * (pred_a + 1)) == rate)
+                               break;
+               }
+       } else {
+               for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
+                       pred_s = (rate / (pred_a + 1)) - 1;
 
-               if (((pred_s + 1) * (pred_a + 1)) == rate)
-                       break;
+                       if (((pred_s + 1) * (pred_a + 1)) == rate)
+                               break;
+               }
        }
 
        /*
         * Can't find a 1Hz, so give priority to RTC power consumption
         * by choosing the higher possible value for prediv_a
         */
-       if ((pred_s > pred_s_max) || (pred_a > pred_a_max)) {
+       if (pred_s > pred_s_max || pred_a > pred_a_max) {
                pred_a = pred_a_max;
                pred_s = (rate / (pred_a + 1)) - 1;
 
@@ -656,6 +680,20 @@ static int stm32_rtc_init(struct platform_device *pdev,
                         "fast" : "slow");
        }
 
+       cr = readl_relaxed(rtc->base + regs->cr);
+
+       prer = readl_relaxed(rtc->base + regs->prer);
+       prer &= STM32_RTC_PRER_PRED_S | STM32_RTC_PRER_PRED_A;
+
+       pred_s = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) &
+                STM32_RTC_PRER_PRED_S;
+       pred_a = (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) &
+                STM32_RTC_PRER_PRED_A;
+
+       /* quit if there is nothing to initialize */
+       if ((cr & STM32_RTC_CR_FMT) == 0 && prer == (pred_s | pred_a))
+               return 0;
+
        stm32_rtc_wpr_unlock(rtc);
 
        ret = stm32_rtc_enter_init_mode(rtc);
@@ -665,13 +703,10 @@ static int stm32_rtc_init(struct platform_device *pdev,
                goto end;
        }
 
-       prer = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) & STM32_RTC_PRER_PRED_S;
-       writel_relaxed(prer, rtc->base + regs->prer);
-       prer |= (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) & STM32_RTC_PRER_PRED_A;
-       writel_relaxed(prer, rtc->base + regs->prer);
+       writel_relaxed(pred_s, rtc->base + regs->prer);
+       writel_relaxed(pred_a | pred_s, rtc->base + regs->prer);
 
        /* Force 24h time format */
-       cr = readl_relaxed(rtc->base + regs->cr);
        cr &= ~STM32_RTC_CR_FMT;
        writel_relaxed(cr, rtc->base + regs->cr);
 
@@ -730,16 +765,13 @@ static int stm32_rtc_probe(struct platform_device *pdev)
                rtc->rtc_ck = devm_clk_get(&pdev->dev, NULL);
        } else {
                rtc->pclk = devm_clk_get(&pdev->dev, "pclk");
-               if (IS_ERR(rtc->pclk)) {
-                       dev_err(&pdev->dev, "no pclk clock");
-                       return PTR_ERR(rtc->pclk);
-               }
+               if (IS_ERR(rtc->pclk))
+                       return dev_err_probe(&pdev->dev, PTR_ERR(rtc->pclk), "no pclk clock");
+
                rtc->rtc_ck = devm_clk_get(&pdev->dev, "rtc_ck");
        }
-       if (IS_ERR(rtc->rtc_ck)) {
-               dev_err(&pdev->dev, "no rtc_ck clock");
-               return PTR_ERR(rtc->rtc_ck);
-       }
+       if (IS_ERR(rtc->rtc_ck))
+               return dev_err_probe(&pdev->dev, PTR_ERR(rtc->rtc_ck), "no rtc_ck clock");
 
        if (rtc->data->has_pclk) {
                ret = clk_prepare_enable(rtc->pclk);
@@ -859,7 +891,6 @@ static void stm32_rtc_remove(struct platform_device *pdev)
        device_init_wakeup(&pdev->dev, false);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int stm32_rtc_suspend(struct device *dev)
 {
        struct stm32_rtc *rtc = dev_get_drvdata(dev);
@@ -890,10 +921,10 @@ static int stm32_rtc_resume(struct device *dev)
 
        return ret;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(stm32_rtc_pm_ops,
-                        stm32_rtc_suspend, stm32_rtc_resume);
+static const struct dev_pm_ops stm32_rtc_pm_ops = {
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(stm32_rtc_suspend, stm32_rtc_resume)
+};
 
 static struct platform_driver stm32_rtc_driver = {
        .probe          = stm32_rtc_probe,
index 6f11b74..7566d0a 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/delay.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/stmp_device.h>
 #include <linux/stmp3xxx_rtc_wdt.h>
index 71548dd..8e0c669 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -847,8 +846,6 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       dev_info(&pdev->dev, "RTC enabled\n");
-
        return 0;
 }
 
index f33dc30..20c7e97 100644 (file)
@@ -244,7 +244,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev)
 
        sp_rtc->irq = platform_get_irq(plat_dev, 0);
        if (sp_rtc->irq < 0)
-               return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+               return sp_rtc->irq;
 
        ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
                               IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
index 5d019e3..5cab995 100644 (file)
@@ -14,8 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/types.h>
index 0d90fe9..ec759d8 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/delay.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/sys_soc.h>
 #include <linux/property.h>
index 9f14e24..20faf08 100644 (file)
@@ -252,6 +252,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc->ops = &tps6586x_rtc_ops;
        rtc->rtc->range_max = (1ULL << 30) - 1; /* 30-bit seconds */
+       rtc->rtc->alarm_offset_max = ALM1_VALID_RANGE_IN_SEC;
        rtc->rtc->start_secs = mktime64(2009, 1, 1, 0, 0, 0);
        rtc->rtc->set_start_time = true;
 
index 75e4c2d..411ff66 100644 (file)
@@ -406,11 +406,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, tps_rtc);
 
        irq  = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n",
-                       irq);
-               return -ENXIO;
-       }
+       if (irq < 0)
+               return irq;
 
        ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                tps65910_rtc_interrupt, IRQF_TRIGGER_LOW,
index 81b3694..13f8ce0 100644 (file)
@@ -487,11 +487,24 @@ static const struct rtc_class_ops twl_rtc_ops = {
        .alarm_irq_enable = twl_rtc_alarm_irq_enable,
 };
 
+static int twl_nvram_read(void *priv, unsigned int offset, void *val,
+                         size_t bytes)
+{
+       return twl_i2c_read((long)priv, val, offset, bytes);
+}
+
+static int twl_nvram_write(void *priv, unsigned int offset, void *val,
+                          size_t bytes)
+{
+       return twl_i2c_write((long)priv, val, offset, bytes);
+}
+
 /*----------------------------------------------------------------------*/
 
 static int twl_rtc_probe(struct platform_device *pdev)
 {
        struct twl_rtc *twl_rtc;
+       struct nvmem_config nvmem_cfg;
        struct device_node *np = pdev->dev.of_node;
        int ret = -EINVAL;
        int irq = platform_get_irq(pdev, 0);
@@ -542,7 +555,6 @@ static int twl_rtc_probe(struct platform_device *pdev)
                        REG_INT_MSK_STS_A);
        }
 
-       dev_info(&pdev->dev, "Enabling TWL-RTC\n");
        ret = twl_rtc_write_u8(twl_rtc, BIT_RTC_CTRL_REG_STOP_RTC_M,
                               REG_RTC_CTRL_REG);
        if (ret < 0)
@@ -564,11 +576,8 @@ static int twl_rtc_probe(struct platform_device *pdev)
 
        twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
                                        &twl_rtc_ops, THIS_MODULE);
-       if (IS_ERR(twl_rtc->rtc)) {
-               dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
-                       PTR_ERR(twl_rtc->rtc));
+       if (IS_ERR(twl_rtc->rtc))
                return PTR_ERR(twl_rtc->rtc);
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                                        twl_rtc_interrupt,
@@ -579,6 +588,30 @@ static int twl_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
+       memset(&nvmem_cfg, 0, sizeof(nvmem_cfg));
+       nvmem_cfg.name = "twl-secured-";
+       nvmem_cfg.type = NVMEM_TYPE_BATTERY_BACKED;
+       nvmem_cfg.reg_read = twl_nvram_read,
+       nvmem_cfg.reg_write = twl_nvram_write,
+       nvmem_cfg.word_size = 1;
+       nvmem_cfg.stride = 1;
+       if (twl_class_is_4030()) {
+               /* 20 bytes SECURED_REG area */
+               nvmem_cfg.size = 20;
+               nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+               /* 8 bytes BACKUP area */
+               nvmem_cfg.name = "twl-backup-";
+               nvmem_cfg.size = 8;
+               nvmem_cfg.priv = (void *)TWL4030_MODULE_BACKUP;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+       } else {
+               /* 8 bytes SECURED_REG area */
+               nvmem_cfg.size = 8;
+               nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+       }
+
        return 0;
 }
 
index 947f807..3c773cf 100644 (file)
@@ -386,8 +386,6 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
        /* enable the RTC if it's not already enabled */
        power5 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
        if (!(power5 &  WM8350_RTC_TICK_ENA)) {
-               dev_info(wm8350->dev, "Starting RTC\n");
-
                wm8350_reg_unlock(wm8350);
 
                ret = wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5,
@@ -426,11 +424,8 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
 
        wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350",
                                        &wm8350_rtc_ops, THIS_MODULE);
-       if (IS_ERR(wm_rtc->rtc)) {
-               ret = PTR_ERR(wm_rtc->rtc);
-               dev_err(&pdev->dev, "failed to register RTC: %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(wm_rtc->rtc))
+               return PTR_ERR(wm_rtc->rtc);
 
        ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
                            wm8350_rtc_update_handler, 0,
index 06bcb6c..4b7ecd4 100644 (file)
@@ -411,13 +411,13 @@ removeseg:
                        segment_unload(entry->segment_name);
        }
        list_del(&dev_info->lh);
+       up_write(&dcssblk_devices_sem);
 
        dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
        put_disk(dev_info->gd);
-       up_write(&dcssblk_devices_sem);
 
        if (device_remove_file_self(dev, attr)) {
                device_unregister(dev);
@@ -790,18 +790,17 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
        }
 
        list_del(&dev_info->lh);
+       /* unload all related segments */
+       list_for_each_entry(entry, &dev_info->seg_list, lh)
+               segment_unload(entry->segment_name);
+       up_write(&dcssblk_devices_sem);
+
        dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
        put_disk(dev_info->gd);
 
-       /* unload all related segments */
-       list_for_each_entry(entry, &dev_info->seg_list, lh)
-               segment_unload(entry->segment_name);
-
-       up_write(&dcssblk_devices_sem);
-
        device_unregister(&dev_info->dev);
        put_device(&dev_info->dev);
 
index 9fa92e4..7207a7f 100644 (file)
@@ -111,7 +111,7 @@ static inline unsigned long mon_mca_end(struct mon_msg *monmsg)
 
 static inline u8 mon_mca_type(struct mon_msg *monmsg, u8 index)
 {
-       return *((u8 *) mon_mca_start(monmsg) + monmsg->mca_offset + index);
+       return *((u8 *)__va(mon_mca_start(monmsg)) + monmsg->mca_offset + index);
 }
 
 static inline u32 mon_mca_size(struct mon_msg *monmsg)
@@ -121,12 +121,12 @@ static inline u32 mon_mca_size(struct mon_msg *monmsg)
 
 static inline u32 mon_rec_start(struct mon_msg *monmsg)
 {
-       return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 4));
+       return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 4));
 }
 
 static inline u32 mon_rec_end(struct mon_msg *monmsg)
 {
-       return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 8));
+       return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 8));
 }
 
 static int mon_check_mca(struct mon_msg *monmsg)
@@ -392,8 +392,7 @@ static ssize_t mon_read(struct file *filp, char __user *data,
        mce_start = mon_mca_start(monmsg) + monmsg->mca_offset;
        if ((monmsg->pos >= mce_start) && (monmsg->pos < mce_start + 12)) {
                count = min(count, (size_t) mce_start + 12 - monmsg->pos);
-               ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
-                                  count);
+               ret = copy_to_user(data, __va(monmsg->pos), count);
                if (ret)
                        return -EFAULT;
                monmsg->pos += count;
@@ -406,8 +405,7 @@ static ssize_t mon_read(struct file *filp, char __user *data,
        if (monmsg->pos <= mon_rec_end(monmsg)) {
                count = min(count, (size_t) mon_rec_end(monmsg) - monmsg->pos
                                            + 1);
-               ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
-                                  count);
+               ret = copy_to_user(data, __va(monmsg->pos), count);
                if (ret)
                        return -EFAULT;
                monmsg->pos += count;
index 34967e6..a108f2b 100644 (file)
@@ -49,8 +49,6 @@ int register_adapter_interrupt(struct airq_struct *airq)
                        return -ENOMEM;
                airq->flags |= AIRQ_PTR_ALLOCATED;
        }
-       if (!airq->lsi_mask)
-               airq->lsi_mask = 0xff;
        snprintf(dbf_txt, sizeof(dbf_txt), "rairq:%p", airq);
        CIO_TRACE_EVENT(4, dbf_txt);
        isc_register(airq->isc);
@@ -98,7 +96,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
        head = &airq_lists[tpi_info->isc];
        rcu_read_lock();
        hlist_for_each_entry_rcu(airq, head, list)
-               if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
+               if (*airq->lsi_ptr != 0)
                        airq->handler(airq, tpi_info);
        rcu_read_unlock();
 
index 4b23c9f..ce04caa 100644 (file)
@@ -366,7 +366,6 @@ static int zcdn_create(const char *name)
 {
        dev_t devt;
        int i, rc = 0;
-       char nodename[ZCDN_MAX_NAME];
        struct zcdn_device *zcdndev;
 
        if (mutex_lock_interruptible(&ap_perms_mutex))
@@ -407,13 +406,11 @@ static int zcdn_create(const char *name)
        zcdndev->device.devt = devt;
        zcdndev->device.groups = zcdn_dev_attr_groups;
        if (name[0])
-               strncpy(nodename, name, sizeof(nodename));
+               rc = dev_set_name(&zcdndev->device, "%s", name);
        else
-               snprintf(nodename, sizeof(nodename),
-                        ZCRYPT_NAME "_%d", (int)MINOR(devt));
-       nodename[sizeof(nodename) - 1] = '\0';
-       if (dev_set_name(&zcdndev->device, nodename)) {
-               rc = -EINVAL;
+               rc = dev_set_name(&zcdndev->device, ZCRYPT_NAME "_%d", (int)MINOR(devt));
+       if (rc) {
+               kfree(zcdndev);
                goto unlockout;
        }
        rc = device_register(&zcdndev->device);
index 0292276..ac67576 100644 (file)
@@ -250,7 +250,6 @@ static struct airq_info *new_airq_info(int index)
        info->airq.handler = virtio_airq_handler;
        info->summary_indicator_idx = index;
        info->airq.lsi_ptr = get_summary_indicator(info);
-       info->airq.lsi_mask = 0xff;
        info->airq.isc = VIRTIO_AIRQ_ISC;
        rc = register_adapter_interrupt(&info->airq);
        if (rc) {
index bd5f39d..9472b97 100644 (file)
@@ -787,7 +787,7 @@ static int hisi_sas_init_device(struct domain_device *device)
                 * However we don't need to issue a hard reset here for these
                 * reasons:
                 * a. When probing the device, libsas/libata already issues a
-                * hard reset in sas_probe_sata() -> ata_sas_async_probe().
+                * hard reset in sas_probe_sata() -> ata_port_probe().
                 * Note that in hisi_sas_debug_I_T_nexus_reset() we take care
                 * to issue a hard reset by checking the dev status (== INIT).
                 * b. When resetting the controller, this is simply unnecessary.
index 3bf4547..12e2653 100644 (file)
@@ -567,8 +567,6 @@ static struct ata_port_operations sas_sata_ops = {
        .qc_prep                = ata_noop_qc_prep,
        .qc_issue               = sas_ata_qc_issue,
        .qc_fill_rtf            = sas_ata_qc_fill_rtf,
-       .port_start             = ata_sas_port_start,
-       .port_stop              = ata_sas_port_stop,
        .set_dmamode            = sas_ata_set_dmamode,
        .sched_eh               = sas_ata_sched_eh,
        .end_eh                 = sas_ata_end_eh,
@@ -609,9 +607,6 @@ int sas_ata_init(struct domain_device *found_dev)
        ap->private_data = found_dev;
        ap->cbl = ATA_CBL_SATA;
        ap->scsi_host = shost;
-       rc = ata_sas_port_init(ap);
-       if (rc)
-               goto destroy_port;
 
        rc = ata_sas_tport_add(ata_host->dev, ap);
        if (rc)
@@ -623,7 +618,7 @@ int sas_ata_init(struct domain_device *found_dev)
        return 0;
 
 destroy_port:
-       ata_sas_port_destroy(ap);
+       kfree(ap);
 free_host:
        ata_host_put(ata_host);
        return rc;
@@ -657,7 +652,7 @@ void sas_probe_sata(struct asd_sas_port *port)
                if (!dev_is_sata(dev))
                        continue;
 
-               ata_sas_async_probe(dev->sata_dev.ap);
+               ata_port_probe(dev->sata_dev.ap);
        }
        mutex_unlock(&port->ha->disco_mutex);
 
index 15cb996..ff7b63b 100644 (file)
@@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref)
 
        if (dev_is_sata(dev) && dev->sata_dev.ap) {
                ata_sas_tport_delete(dev->sata_dev.ap);
-               ata_sas_port_destroy(dev->sata_dev.ap);
+               kfree(dev->sata_dev.ap);
                ata_host_put(dev->sata_dev.ata_host);
                dev->sata_dev.ata_host = NULL;
                dev->sata_dev.ap = NULL;
index 3f5b155..fddc633 100644 (file)
@@ -106,6 +106,7 @@ struct sun6i_spi {
        struct reset_control    *rstc;
 
        struct completion       done;
+       struct completion       dma_rx_done;
 
        const u8                *tx_buf;
        u8                      *rx_buf;
@@ -200,6 +201,13 @@ static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
        return SUN6I_MAX_XFER_SIZE - 1;
 }
 
+static void sun6i_spi_dma_rx_cb(void *param)
+{
+       struct sun6i_spi *sspi = param;
+
+       complete(&sspi->dma_rx_done);
+}
+
 static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                                 struct spi_transfer *tfr)
 {
@@ -211,7 +219,7 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                struct dma_slave_config rxconf = {
                        .direction = DMA_DEV_TO_MEM,
                        .src_addr = sspi->dma_addr_rx,
-                       .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+                       .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
                        .src_maxburst = 8,
                };
 
@@ -224,6 +232,8 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                                                 DMA_PREP_INTERRUPT);
                if (!rxdesc)
                        return -EINVAL;
+               rxdesc->callback_param = sspi;
+               rxdesc->callback = sun6i_spi_dma_rx_cb;
        }
 
        txdesc = NULL;
@@ -279,6 +289,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
                return -EINVAL;
 
        reinit_completion(&sspi->done);
+       reinit_completion(&sspi->dma_rx_done);
        sspi->tx_buf = tfr->tx_buf;
        sspi->rx_buf = tfr->rx_buf;
        sspi->len = tfr->len;
@@ -479,6 +490,22 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
        start = jiffies;
        timeout = wait_for_completion_timeout(&sspi->done,
                                              msecs_to_jiffies(tx_time));
+
+       if (!use_dma) {
+               sun6i_spi_drain_fifo(sspi);
+       } else {
+               if (timeout && rx_len) {
+                       /*
+                        * Even though RX on the peripheral side has finished
+                        * RX DMA might still be in flight
+                        */
+                       timeout = wait_for_completion_timeout(&sspi->dma_rx_done,
+                                                             timeout);
+                       if (!timeout)
+                               dev_warn(&master->dev, "RX DMA timeout\n");
+               }
+       }
+
        end = jiffies;
        if (!timeout) {
                dev_warn(&master->dev,
@@ -506,7 +533,6 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
        /* Transfer complete */
        if (status & SUN6I_INT_CTL_TC) {
                sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
-               sun6i_spi_drain_fifo(sspi);
                complete(&sspi->done);
                return IRQ_HANDLED;
        }
@@ -665,6 +691,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
        }
 
        init_completion(&sspi->done);
+       init_completion(&sspi->dma_rx_done);
 
        sspi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
        if (IS_ERR(sspi->rstc)) {
index f569d37..57cc196 100644 (file)
@@ -266,7 +266,7 @@ static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
 {
        struct gb_connection *connection;
        struct gb_pwm_chip *pwmc;
-       struct pwm_chip *pwm;
+       struct pwm_chip *chip;
        int ret;
 
        pwmc = kzalloc(sizeof(*pwmc), GFP_KERNEL);
@@ -294,13 +294,13 @@ static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
        if (ret)
                goto exit_connection_disable;
 
-       pwm = &pwmc->chip;
+       chip = &pwmc->chip;
 
-       pwm->dev = &gbphy_dev->dev;
-       pwm->ops = &gb_pwm_ops;
-       pwm->npwm = pwmc->pwm_max + 1;
+       chip->dev = &gbphy_dev->dev;
+       chip->ops = &gb_pwm_ops;
+       chip->npwm = pwmc->pwm_max + 1;
 
-       ret = pwmchip_add(pwm);
+       ret = pwmchip_add(chip);
        if (ret) {
                dev_err(&gbphy_dev->dev,
                        "failed to register PWM: %d\n", ret);
index ff01f2c..6010135 100644 (file)
@@ -13,7 +13,9 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 /*
  * USB Control Register
index d3bea42..d28c30b 100644 (file)
@@ -87,8 +87,7 @@ static int gpio_backlight_probe(struct platform_device *pdev)
                /* Not booted with device tree or no phandle link to the node */
                bl->props.power = def_value ? FB_BLANK_UNBLANK
                                            : FB_BLANK_POWERDOWN;
-       else if (gpiod_get_direction(gbl->gpiod) == 0 &&
-                gpiod_get_value_cansleep(gbl->gpiod) == 0)
+       else if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
                bl->props.power = FB_BLANK_POWERDOWN;
        else
                bl->props.power = FB_BLANK_UNBLANK;
index 3259292..032f8bd 100644 (file)
@@ -243,7 +243,7 @@ MODULE_DEVICE_TABLE(of, led_bl_of_match);
 static struct platform_driver led_bl_driver = {
        .driver         = {
                .name           = "led-backlight",
-               .of_match_table = of_match_ptr(led_bl_of_match),
+               .of_match_table = led_bl_of_match,
        },
        .probe          = led_bl_probe,
        .remove_new     = led_bl_remove,
index 1c9e921..da1f124 100644 (file)
@@ -71,6 +71,7 @@ struct lp855x {
        struct device *dev;
        struct lp855x_platform_data *pdata;
        struct pwm_device *pwm;
+       bool needs_pwm_init;
        struct regulator *supply;       /* regulator for VDD input */
        struct regulator *enable;       /* regulator for EN/VDDIO input */
 };
@@ -216,16 +217,24 @@ err:
        return ret;
 }
 
-static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+static int lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
 {
        struct pwm_state state;
 
-       pwm_get_state(lp->pwm, &state);
+       if (lp->needs_pwm_init) {
+               pwm_init_state(lp->pwm, &state);
+               /* Legacy platform data compatibility */
+               if (lp->pdata->period_ns > 0)
+                       state.period = lp->pdata->period_ns;
+               lp->needs_pwm_init = false;
+       } else {
+               pwm_get_state(lp->pwm, &state);
+       }
 
        state.duty_cycle = div_u64(br * state.period, max_br);
        state.enabled = state.duty_cycle;
 
-       pwm_apply_state(lp->pwm, &state);
+       return pwm_apply_state(lp->pwm, &state);
 }
 
 static int lp855x_bl_update_status(struct backlight_device *bl)
@@ -237,11 +246,12 @@ static int lp855x_bl_update_status(struct backlight_device *bl)
                brightness = 0;
 
        if (lp->mode == PWM_BASED)
-               lp855x_pwm_ctrl(lp, brightness, bl->props.max_brightness);
+               return lp855x_pwm_ctrl(lp, brightness,
+                                     bl->props.max_brightness);
        else if (lp->mode == REGISTER_BASED)
-               lp855x_write_byte(lp, lp->cfg->reg_brightness, (u8)brightness);
-
-       return 0;
+               return lp855x_write_byte(lp, lp->cfg->reg_brightness,
+                                       (u8)brightness);
+       return -EINVAL;
 }
 
 static const struct backlight_ops lp855x_bl_ops = {
@@ -387,7 +397,6 @@ static int lp855x_probe(struct i2c_client *cl)
        const struct i2c_device_id *id = i2c_client_get_device_id(cl);
        const struct acpi_device_id *acpi_id = NULL;
        struct device *dev = &cl->dev;
-       struct pwm_state pwmstate;
        struct lp855x *lp;
        int ret;
 
@@ -470,15 +479,11 @@ static int lp855x_probe(struct i2c_client *cl)
                else
                        return dev_err_probe(dev, ret, "getting PWM\n");
 
+               lp->needs_pwm_init = false;
                lp->mode = REGISTER_BASED;
                dev_dbg(dev, "mode: register based\n");
        } else {
-               pwm_init_state(lp->pwm, &pwmstate);
-               /* Legacy platform data compatibility */
-               if (lp->pdata->period_ns > 0)
-                       pwmstate.period = lp->pdata->period_ns;
-               pwm_apply_state(lp->pwm, &pwmstate);
-
+               lp->needs_pwm_init = true;
                lp->mode = PWM_BASED;
                dev_dbg(dev, "mode: PWM based\n");
        }
index c6996aa..1012909 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/backlight.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 /* From DT binding */
index 0cbfb49..7514589 100644 (file)
@@ -307,7 +307,7 @@ config XILINX_WATCHDOG
 config XILINX_WINDOW_WATCHDOG
        tristate "Xilinx window watchdog timer"
        depends on HAS_IOMEM
-       depends on ARM64
+       depends on ARM64 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Window watchdog driver for the versal_wwdt IP core.
@@ -343,7 +343,7 @@ config RAVE_SP_WATCHDOG
 
 config MLX_WDT
        tristate "Mellanox Watchdog"
-       depends on MELLANOX_PLATFORM
+       depends on MELLANOX_PLATFORM || COMPILE_TEST
        select WATCHDOG_CORE
        select REGMAP
        help
@@ -493,7 +493,7 @@ config FTWDT010_WATCHDOG
 
 config IXP4XX_WATCHDOG
        tristate "IXP4xx Watchdog"
-       depends on ARCH_IXP4XX
+       depends on ARCH_IXP4XX || (ARM && COMPILE_TEST)
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -529,7 +529,7 @@ config S3C2410_WATCHDOG
 
 config SA1100_WATCHDOG
        tristate "SA1100/PXA2xx watchdog"
-       depends on ARCH_SA1100 || ARCH_PXA
+       depends on ARCH_SA1100 || ARCH_PXA || COMPILE_TEST
        help
          Watchdog timer embedded into SA11x0 and PXA2xx chips. This will
          reboot your system when timeout is reached.
@@ -720,7 +720,7 @@ config IMX2_WDT
 config IMX_SC_WDT
        tristate "IMX SC Watchdog"
        depends on HAVE_ARM_SMCCC
-       depends on IMX_SCU
+       depends on IMX_SCU || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the system controller watchdog
@@ -931,7 +931,7 @@ config ASPEED_WATCHDOG
 
 config STM32_WATCHDOG
        tristate "STM32 Independent WatchDoG (IWDG) support"
-       depends on ARCH_STM32
+       depends on ARCH_STM32 || COMPILE_TEST
        select WATCHDOG_CORE
        default y
        help
@@ -1065,7 +1065,7 @@ config ACQUIRE_WDT
 
 config ADVANTECH_WDT
        tristate "Advantech SBC Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          If you are configuring a Linux kernel for the Advantech single-board
          computer, say `Y' here to support its built-in watchdog timer
@@ -1074,14 +1074,16 @@ config ADVANTECH_WDT
 
 config ADVANTECH_EC_WDT
        tristate "Advantech Embedded Controller Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
+       select ISA_BUS_API
+       select WATCHDOG_CORE
        help
                This driver supports Advantech products with ITE based Embedded Controller.
                It does not support Advantech products with other ECs or without EC.
 
 config ALIM1535_WDT
        tristate "ALi M1535 PMU Watchdog Timer"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        help
          This is the driver for the hardware watchdog on the ALi M1535 PMU.
 
@@ -1105,7 +1107,7 @@ config ALIM7101_WDT
 
 config EBC_C384_WDT
        tristate "WinSystems EBC-C384 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select ISA_BUS_API
        select WATCHDOG_CORE
        help
@@ -1115,7 +1117,7 @@ config EBC_C384_WDT
 
 config EXAR_WDT
        tristate "Exar Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Enables watchdog timer support for the watchdog timer present
@@ -1126,7 +1128,7 @@ config EXAR_WDT
 
 config F71808E_WDT
        tristate "Fintek F718xx, F818xx Super I/O Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the Fintek F71808E,
@@ -1138,7 +1140,7 @@ config F71808E_WDT
 
 config SP5100_TCO
        tristate "AMD/ATI SP5100 TCO Timer/Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        help
          Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO
@@ -1177,7 +1179,7 @@ config SC520_WDT
 
 config SBC_FITPC2_WATCHDOG
        tristate "Compulab SBC-FITPC2 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the fit-PC2,
          fit-PC2i, CM-iAM single-board computers made by Compulab.
@@ -1202,7 +1204,7 @@ config SBC_FITPC2_WATCHDOG
 
 config EUROTECH_WDT
        tristate "Eurotech CPU-1220/1410 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          Enable support for the watchdog timer on the Eurotech CPU-1220 and
          CPU-1410 cards.  These are PC/104 SBCs. Spec sheets and product
@@ -1210,7 +1212,7 @@ config EUROTECH_WDT
 
 config IB700_WDT
        tristate "IB700 SBC Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the IB700 Single
          Board Computer produced by TMC Technology (www.tmc-uk.com). This
@@ -1227,7 +1229,7 @@ config IB700_WDT
 
 config IBMASR
        tristate "IBM Automatic Server Restart"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the IBM Automatic Server Restart watchdog
          timer built-in into some eServer xSeries machines.
@@ -1237,7 +1239,7 @@ config IBMASR
 
 config WAFER_WDT
        tristate "ICP Single Board Computer Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is a driver for the hardware watchdog on the ICP Single
          Board Computer. This driver is working on (at least) the following
@@ -1259,7 +1261,7 @@ config I6300ESB_WDT
 
 config IE6XX_WDT
        tristate "Intel Atom E6xx Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        select MFD_CORE
        select LPC_SCH
@@ -1319,7 +1321,7 @@ config ITCO_VENDOR_SUPPORT
 
 config IT8712F_WDT
        tristate "IT8712F (Smart Guardian) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the IT8712F
          Super I/0 chipset used on many motherboards.
@@ -1332,7 +1334,7 @@ config IT8712F_WDT
 
 config IT87_WDT
        tristate "IT87 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the ITE IT8607,
@@ -1350,7 +1352,7 @@ config IT87_WDT
 config HP_WATCHDOG
        tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
        select WATCHDOG_CORE
-       depends on (ARM64 || X86) && PCI
+       depends on (ARM64 || X86 || COMPILE_TEST) && PCI
        help
          A software monitoring watchdog and NMI handling driver. This driver
          will detect lockups and provide a stack trace. This is a driver that
@@ -1380,7 +1382,7 @@ config KEMPLD_WDT
 
 config SC1200_WDT
        tristate "National Semiconductor PC87307/PC97307 (ala SC1200) Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is a driver for National Semiconductor PC87307/PC97307 hardware
          watchdog cards as found on the SC1200. This watchdog is mainly used
@@ -1403,7 +1405,7 @@ config SCx200_WDT
 
 config PC87413_WDT
        tristate "NS PC87413 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the PC87413 chipset
          This watchdog simply watches your kernel to make sure it doesn't
@@ -1417,7 +1419,7 @@ config PC87413_WDT
 
 config NV_TCO
        tristate "nVidia TCO Timer/Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        help
          Hardware driver for the TCO timer built into the nVidia Hub family
          (such as the MCP51).  The TCO (Total Cost of Ownership) timer is a
@@ -1446,7 +1448,7 @@ config RDC321X_WDT
 
 config 60XX_WDT
        tristate "SBC-60XX Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This driver can be used with the watchdog timer found on some
          single board computers, namely the 6010 PII based computer.
@@ -1486,7 +1488,7 @@ config SBC7240_WDT
 
 config CPU5_WDT
        tristate "SMA CPU5 Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          TBD.
          To compile this driver as a module, choose M here: the
@@ -1494,7 +1496,7 @@ config CPU5_WDT
 
 config SMSC_SCH311X_WDT
        tristate "SMSC SCH311X Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog timer on the
          SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset
@@ -1506,7 +1508,7 @@ config SMSC_SCH311X_WDT
 
 config SMSC37B787_WDT
        tristate "Winbond SMsC37B787 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog component on the
          Winbond SMsC37B787 chipset as used on the NetRunner Mainboard
@@ -1526,7 +1528,7 @@ config SMSC37B787_WDT
 
 config TQMX86_WDT
        tristate "TQ-Systems TQMX86 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog timer in the TQMX86 IO
@@ -1539,7 +1541,7 @@ config TQMX86_WDT
 
 config VIA_WDT
        tristate "VIA Watchdog Timer"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog timer on VIA
@@ -1552,7 +1554,7 @@ config VIA_WDT
 
 config W83627HF_WDT
        tristate "Watchdog timer for W83627HF/W83627DHG and compatibles"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the following
@@ -1582,7 +1584,7 @@ config W83627HF_WDT
 
 config W83877F_WDT
        tristate "W83877F (EMACS) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the W83877F chipset
          as used in EMACS PC-104 motherboards (and likely others).  This
@@ -1597,7 +1599,7 @@ config W83877F_WDT
 
 config W83977F_WDT
        tristate "W83977F (PCM-5335) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the W83977F I/O chip
          as used in AAEON's PCM-5335 SBC (and likely others).  This
@@ -1610,7 +1612,7 @@ config W83977F_WDT
 
 config MACHZ_WDT
        tristate "ZF MachZ Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          If you are using a ZF Micro MachZ processor, say Y here, otherwise
          N.  This is the driver for the watchdog timer built-in on that
@@ -1623,7 +1625,7 @@ config MACHZ_WDT
 
 config SBC_EPX_C3_WATCHDOG
        tristate "Winsystems SBC EPX-C3 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the EPX-C3
          Single-board computer made by Winsystems, Inc.
@@ -1739,7 +1741,7 @@ config INDYDOG
 
 config JZ4740_WDT
        tristate "Ingenic jz4740 SoC hardware watchdog"
-       depends on MIPS
+       depends on MIPS || COMPILE_TEST
        depends on COMMON_CLK
        select WATCHDOG_CORE
        select MFD_SYSCON
@@ -1798,6 +1800,19 @@ config OCTEON_WDT
          from the first interrupt, it is then only poked when the
          device is written.
 
+config MARVELL_GTI_WDT
+       tristate "Marvell GTI Watchdog driver"
+       depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+       default y
+       select WATCHDOG_CORE
+       help
+         Marvell GTI hardware supports watchdog timer. First timeout
+         works as watchdog pretimeout and installed interrupt handler
+         will be called on first timeout. Hardware can generate interrupt
+         to SCP on second timeout but it is not enabled, so second
+         timeout is ignored. If device poke does not happen then system
+         will reboot on third timeout.
+
 config BCM2835_WDT
        tristate "Broadcom BCM2835 hardware watchdog"
        depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
@@ -1823,7 +1838,7 @@ config BCM_KONA_WDT
 
 config BCM_KONA_WDT_DEBUG
        bool "DEBUGFS support for BCM Kona Watchdog"
-       depends on BCM_KONA_WDT
+       depends on BCM_KONA_WDT || COMPILE_TEST
        help
          If enabled, adds /sys/kernel/debug/bcm_kona_wdt/info which provides
          access to the driver's internal data structures as well as watchdog
@@ -1864,7 +1879,7 @@ config LANTIQ_WDT
 
 config LOONGSON1_WDT
        tristate "Loongson1 SoC hardware watchdog"
-       depends on MACH_LOONGSON32
+       depends on MACH_LOONGSON32 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Hardware driver for the Loongson1 SoC Watchdog Timer.
@@ -1878,7 +1893,7 @@ config RALINK_WDT
 
 config GXP_WATCHDOG
        tristate "HPE GXP watchdog support"
-       depends on ARCH_HPE_GXP
+       depends on ARCH_HPE_GXP || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
index 3633f5b..7eab9de 100644 (file)
@@ -98,6 +98,7 @@ obj-$(CONFIG_VISCONTI_WATCHDOG) += visconti_wdt.o
 obj-$(CONFIG_MSC313E_WATCHDOG) += msc313e_wdt.o
 obj-$(CONFIG_APPLE_WATCHDOG) += apple_wdt.o
 obj-$(CONFIG_SUNPLUS_WATCHDOG) += sunplus_wdt.o
+obj-$(CONFIG_MARVELL_GTI_WDT) += marvell_gti_wdt.o
 
 # X86 (i386 + ia64 + x86_64) Architecture
 obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
index e586529..8133a5d 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/types.h>
index d20ec27..558015f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/atmel-st.h>
 #include <linux/miscdevice.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
@@ -26,8 +27,6 @@
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/uaccess.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 
 #define WDT_DEFAULT_TIME       5       /* seconds */
 #define WDT_MAX_TIME           256     /* seconds */
index 47250f9..901b94d 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/uaccess.h>
 
 #include <asm/irq.h>
index 442c5bf..28f5af7 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/watchdog.h>
@@ -221,20 +221,18 @@ static const struct dev_pm_ops ftwdt010_wdt_dev_pm_ops = {
                                ftwdt010_wdt_resume)
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id ftwdt010_wdt_match[] = {
        { .compatible = "faraday,ftwdt010" },
        { .compatible = "cortina,gemini-watchdog" },
        {},
 };
 MODULE_DEVICE_TABLE(of, ftwdt010_wdt_match);
-#endif
 
 static struct platform_driver ftwdt010_wdt_driver = {
        .probe          = ftwdt010_wdt_probe,
        .driver         = {
                .name   = "ftwdt010-wdt",
-               .of_match_table = of_match_ptr(ftwdt010_wdt_match),
+               .of_match_table = ftwdt010_wdt_match,
                .pm = &ftwdt010_wdt_dev_pm_ops,
        },
 };
index 97afc90..6a1db1c 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/fs.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
index 6fcc359..42e8ffa 100644 (file)
@@ -26,8 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
@@ -375,7 +374,7 @@ static void imx2_wdt_shutdown(struct platform_device *pdev)
                 */
                imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
                imx2_wdt_ping(wdog);
-               dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n");
+               dev_crit(&pdev->dev, "Device shutdown.\n");
        }
 }
 
index 7ca4867..c703586 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/watchdog.h>
index 9b2173f..fb7fae7 100644 (file)
@@ -203,3 +203,4 @@ module_platform_driver(mid_wdt_driver);
 MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
 MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:intel_mid_wdt");
index 6fab504..a273b97 100644 (file)
@@ -9,7 +9,8 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/watchdog.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/clk.h>
 #include <linux/io.h>
index 4ac7810..0587ff4 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
diff --git a/drivers/watchdog/marvell_gti_wdt.c b/drivers/watchdog/marvell_gti_wdt.c
new file mode 100644 (file)
index 0000000..d7eb828
--- /dev/null
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell GTI Watchdog driver
+ *
+ * Copyright (C) 2023 Marvell.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/*
+ * Hardware supports following mode of operation:
+ * 1) Interrupt Only:
+ *    This will generate the interrupt to arm core whenever timeout happens.
+ *
+ * 2) Interrupt + del3t (Interrupt to firmware (SCP processor)).
+ *    This will generate interrupt to arm core on 1st timeout happens
+ *    This will generate interrupt to SCP processor on 2nd timeout happens
+ *
+ * 3) Interrupt + Interrupt to SCP processor (called delt3t) + reboot.
+ *    This will generate interrupt to arm core on 1st timeout happens
+ *    Will generate interrupt to SCP processor on 2nd timeout happens,
+ *    if interrupt is configured.
+ *    Reboot on 3rd timeout.
+ *
+ * Driver will use hardware in mode-3 above so that system can reboot in case
+ * a hardware hang. Also h/w is configured not to generate SCP interrupt, so
+ * effectively 2nd timeout is ignored within hardware.
+ *
+ * First timeout is effectively watchdog pretimeout.
+ */
+
+/* GTI CWD Watchdog (GTI_CWD_WDOG) Register */
+#define GTI_CWD_WDOG(reg_offset)       (0x8 * (reg_offset))
+#define GTI_CWD_WDOG_MODE_INT_DEL3T_RST        0x3
+#define GTI_CWD_WDOG_MODE_MASK         GENMASK_ULL(1, 0)
+#define GTI_CWD_WDOG_LEN_SHIFT         4
+#define GTI_CWD_WDOG_LEN_MASK          GENMASK_ULL(19, 4)
+#define GTI_CWD_WDOG_CNT_SHIFT         20
+#define GTI_CWD_WDOG_CNT_MASK          GENMASK_ULL(43, 20)
+
+/* GTI CWD Watchdog Interrupt (GTI_CWD_INT) Register */
+#define GTI_CWD_INT                    0x200
+#define GTI_CWD_INT_PENDING_STATUS(bit)        BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Clear (GTI_CWD_INT_ENA_CLR) Register */
+#define GTI_CWD_INT_ENA_CLR            0x210
+#define GTI_CWD_INT_ENA_CLR_VAL(bit)   BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Set (GTI_CWD_INT_ENA_SET) Register */
+#define GTI_CWD_INT_ENA_SET            0x218
+#define GTI_CWD_INT_ENA_SET_VAL(bit)   BIT_ULL(bit)
+
+/* GTI CWD Watchdog Poke (GTI_CWD_POKE) Registers */
+#define GTI_CWD_POKE(reg_offset)       (0x10000 + 0x8 * (reg_offset))
+#define GTI_CWD_POKE_VAL               1
+
+struct gti_match_data {
+       u32 gti_num_timers;
+};
+
+static const struct gti_match_data match_data_octeontx2 = {
+       .gti_num_timers = 54,
+};
+
+static const struct gti_match_data match_data_cn10k = {
+       .gti_num_timers = 64,
+};
+
+struct gti_wdt_priv {
+       struct watchdog_device wdev;
+       void __iomem *base;
+       u32 clock_freq;
+       struct clk *sclk;
+       /* wdt_timer_idx used for timer to be used for system watchdog */
+       u32 wdt_timer_idx;
+       const struct gti_match_data *data;
+};
+
+static irqreturn_t gti_wdt_interrupt(int irq, void *data)
+{
+       struct watchdog_device *wdev = data;
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+       /* Clear Interrupt Pending Status */
+       writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT);
+
+       watchdog_notify_pretimeout(wdev);
+
+       return IRQ_HANDLED;
+}
+
+static int gti_wdt_ping(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+       writeq(GTI_CWD_POKE_VAL,
+              priv->base + GTI_CWD_POKE(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_start(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 regval;
+
+       if (!wdev->pretimeout)
+               return -EINVAL;
+
+       set_bit(WDOG_HW_RUNNING, &wdev->status);
+
+       /* Clear any pending interrupt */
+       writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT);
+
+       /* Enable Interrupt */
+       writeq(GTI_CWD_INT_ENA_SET_VAL(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT_ENA_SET);
+
+       /* Set (Interrupt + SCP interrupt (DEL3T) + core domain reset) Mode */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval |= GTI_CWD_WDOG_MODE_INT_DEL3T_RST;
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_stop(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 regval;
+
+       /* Disable Interrupt */
+       writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT_ENA_CLR);
+
+       /* Set GTI_CWD_WDOG.Mode = 0 to stop the timer */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval &= ~GTI_CWD_WDOG_MODE_MASK;
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_settimeout(struct watchdog_device *wdev,
+                                       unsigned int timeout)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 timeout_wdog, regval;
+
+       /* Update new timeout */
+       wdev->timeout = timeout;
+
+       /* Pretimeout is 1/3 of timeout */
+       wdev->pretimeout = timeout / 3;
+
+       /* Get clock cycles from pretimeout */
+       timeout_wdog = (u64)priv->clock_freq * wdev->pretimeout;
+
+       /* Watchdog counts in 1024 cycle steps */
+       timeout_wdog = timeout_wdog >> 10;
+
+       /* GTI_CWD_WDOG.CNT: reload counter is 16-bit */
+       timeout_wdog = (timeout_wdog + 0xff) >> 8;
+       if (timeout_wdog >= 0x10000)
+               timeout_wdog = 0xffff;
+
+       /*
+        * GTI_CWD_WDOG.LEN is 24bit, lower 8-bits should be zero and
+        * upper 16-bits are same as GTI_CWD_WDOG.CNT
+        */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval &= GTI_CWD_WDOG_MODE_MASK;
+       regval |= (timeout_wdog << (GTI_CWD_WDOG_CNT_SHIFT + 8)) |
+                  (timeout_wdog << GTI_CWD_WDOG_LEN_SHIFT);
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_set_pretimeout(struct watchdog_device *wdev,
+                                       unsigned int timeout)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       struct watchdog_device *wdog_dev = &priv->wdev;
+
+       /* pretimeout should 1/3 of max_timeout */
+       if (timeout * 3 <= wdog_dev->max_timeout)
+               return gti_wdt_settimeout(wdev, timeout * 3);
+
+       return -EINVAL;
+}
+
+static void gti_clk_disable_unprepare(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int gti_wdt_get_cntfrq(struct platform_device *pdev,
+                             struct gti_wdt_priv *priv)
+{
+       int err;
+
+       priv->sclk = devm_clk_get_enabled(&pdev->dev, NULL);
+       if (IS_ERR(priv->sclk))
+               return PTR_ERR(priv->sclk);
+
+       err = devm_add_action_or_reset(&pdev->dev,
+                                      gti_clk_disable_unprepare, priv->sclk);
+       if (err)
+               return err;
+
+       priv->clock_freq = clk_get_rate(priv->sclk);
+       if (!priv->clock_freq)
+               return -EINVAL;
+
+       return 0;
+}
+
+static const struct watchdog_info gti_wdt_ident = {
+       .identity = "Marvell GTI watchdog",
+       .options = WDIOF_SETTIMEOUT | WDIOF_PRETIMEOUT | WDIOF_KEEPALIVEPING |
+                  WDIOF_MAGICCLOSE | WDIOF_CARDRESET,
+};
+
+static const struct watchdog_ops gti_wdt_ops = {
+       .owner = THIS_MODULE,
+       .start = gti_wdt_start,
+       .stop = gti_wdt_stop,
+       .ping = gti_wdt_ping,
+       .set_timeout = gti_wdt_settimeout,
+       .set_pretimeout = gti_wdt_set_pretimeout,
+};
+
+static int gti_wdt_probe(struct platform_device *pdev)
+{
+       struct gti_wdt_priv *priv;
+       struct device *dev = &pdev->dev;
+       struct watchdog_device *wdog_dev;
+       u64 max_pretimeout;
+       u32 wdt_idx;
+       int irq;
+       int err;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(priv->base))
+               return dev_err_probe(&pdev->dev, PTR_ERR(priv->base),
+                             "reg property not valid/found\n");
+
+       err = gti_wdt_get_cntfrq(pdev, priv);
+       if (err)
+               return dev_err_probe(&pdev->dev, err,
+                                    "GTI clock frequency not valid/found");
+
+       priv->data = of_device_get_match_data(dev);
+
+       /* default use last timer for watchdog */
+       priv->wdt_timer_idx = priv->data->gti_num_timers - 1;
+
+       err = of_property_read_u32(dev->of_node, "marvell,wdt-timer-index",
+                                  &wdt_idx);
+       if (!err) {
+               if (wdt_idx >= priv->data->gti_num_timers)
+                       return dev_err_probe(&pdev->dev, err,
+                               "GTI wdog timer index not valid");
+
+               priv->wdt_timer_idx = wdt_idx;
+       }
+
+       wdog_dev = &priv->wdev;
+       wdog_dev->info = &gti_wdt_ident,
+       wdog_dev->ops = &gti_wdt_ops,
+       wdog_dev->parent = dev;
+       /*
+        * Watchdog counter is 24 bit where lower 8 bits are zeros
+        * This counter decrements every 1024 clock cycles.
+        */
+       max_pretimeout = (GTI_CWD_WDOG_CNT_MASK >> GTI_CWD_WDOG_CNT_SHIFT);
+       max_pretimeout &= ~0xFFUL;
+       max_pretimeout = (max_pretimeout * 1024) / priv->clock_freq;
+       wdog_dev->pretimeout = max_pretimeout;
+
+       /* Maximum timeout is 3 times the pretimeout */
+       wdog_dev->max_timeout = max_pretimeout * 3;
+       /* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */
+       wdog_dev->min_timeout = 3;
+       wdog_dev->timeout = wdog_dev->pretimeout;
+
+       watchdog_set_drvdata(wdog_dev, priv);
+       platform_set_drvdata(pdev, priv);
+       gti_wdt_settimeout(wdog_dev, wdog_dev->timeout);
+       watchdog_stop_on_reboot(wdog_dev);
+       watchdog_stop_on_unregister(wdog_dev);
+
+       err = devm_watchdog_register_device(dev, wdog_dev);
+       if (err)
+               return err;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n");
+
+       err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0,
+                              pdev->name, &priv->wdev);
+       if (err)
+               return dev_err_probe(dev, err, "Failed to register interrupt handler\n");
+
+       dev_info(dev, "Watchdog enabled (timeout=%d sec)\n", wdog_dev->timeout);
+       return 0;
+}
+
+static const struct of_device_id gti_wdt_of_match[] = {
+       { .compatible = "marvell,cn9670-wdt", .data = &match_data_octeontx2},
+       { .compatible = "marvell,cn10624-wdt", .data = &match_data_cn10k},
+       { },
+};
+MODULE_DEVICE_TABLE(of, gti_wdt_of_match);
+
+static struct platform_driver gti_wdt_driver = {
+       .driver = {
+               .name = "gti-wdt",
+               .of_match_table = gti_wdt_of_match,
+       },
+       .probe = gti_wdt_probe,
+};
+module_platform_driver(gti_wdt_driver);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell GTI watchdog driver");
+MODULE_LICENSE("GPL");
index 3c98030..c7de302 100644 (file)
@@ -153,7 +153,6 @@ MODULE_DEVICE_TABLE(mcb, men_z069_ids);
 static struct mcb_driver men_z069_driver = {
        .driver = {
                .name = "z069-wdt",
-               .owner = THIS_MODULE,
        },
        .probe = men_z069_probe,
        .remove = men_z069_remove,
index 35d80cb..a48622d 100644 (file)
@@ -22,7 +22,6 @@
 
 #define GXBB_WDT_CTRL_CLKDIV_EN                        BIT(25)
 #define GXBB_WDT_CTRL_CLK_EN                   BIT(24)
-#define GXBB_WDT_CTRL_EE_RESET                 BIT(21)
 #define GXBB_WDT_CTRL_EN                       BIT(18)
 #define GXBB_WDT_CTRL_DIV_MASK                 (BIT(18) - 1)
 
@@ -45,6 +44,10 @@ struct meson_gxbb_wdt {
        struct clk *clk;
 };
 
+struct wdt_params {
+       u32 rst;
+};
+
 static int meson_gxbb_wdt_start(struct watchdog_device *wdt_dev)
 {
        struct meson_gxbb_wdt *data = watchdog_get_drvdata(wdt_dev);
@@ -140,8 +143,17 @@ static const struct dev_pm_ops meson_gxbb_wdt_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(meson_gxbb_wdt_suspend, meson_gxbb_wdt_resume)
 };
 
+static const struct wdt_params gxbb_params = {
+       .rst = BIT(21),
+};
+
+static const struct wdt_params t7_params = {
+       .rst = BIT(22),
+};
+
 static const struct of_device_id meson_gxbb_wdt_dt_ids[] = {
-        { .compatible = "amlogic,meson-gxbb-wdt", },
+        { .compatible = "amlogic,meson-gxbb-wdt", .data = &gxbb_params, },
+        { .compatible = "amlogic,t7-wdt", .data = &t7_params, },
         { /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, meson_gxbb_wdt_dt_ids);
@@ -150,6 +162,7 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct meson_gxbb_wdt *data;
+       struct wdt_params *params;
        u32 ctrl_reg;
 
        data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -164,6 +177,8 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(data->clk))
                return PTR_ERR(data->clk);
 
+       params = (struct wdt_params *)of_device_get_match_data(dev);
+
        platform_set_drvdata(pdev, data);
 
        data->wdt_dev.parent = dev;
@@ -191,7 +206,7 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
        /* Setup with 1ms timebase */
        ctrl_reg |= ((clk_get_rate(data->clk) / 1000) &
                        GXBB_WDT_CTRL_DIV_MASK) |
-                       GXBB_WDT_CTRL_EE_RESET |
+                       params->rst |
                        GXBB_WDT_CTRL_CLK_EN |
                        GXBB_WDT_CTRL_CLKDIV_EN;
 
index 539feaa..497496f 100644 (file)
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
index 1c569be..867f9f3 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/watchdog.h>
 #include <linux/io.h>
index a9c4375..b2330b1 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/types.h>
index 2a079ca..05657dc 100644 (file)
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ioport.h>
 #include <linux/watchdog.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 
 /* Register offsets for the Wdt device */
 #define XWT_TWCSR0_OFFSET   0x0 /* Control/Status Register0 */
index bc4ccdd..ab06824 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/watchdog.h>
index 6d1a002..1d282de 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/watchdog.h>
index a98abd0..782b8c2 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #define DRV_NAME "PIKA-WDT"
 
index f4bfbff..f3fcbeb 100644 (file)
@@ -266,7 +266,7 @@ static struct platform_driver pm8916_wdt_driver = {
        .probe = pm8916_wdt_probe,
        .driver = {
                .name = "pm8916-wdt",
-               .of_match_table = of_match_ptr(pm8916_wdt_id_table),
+               .of_match_table = pm8916_wdt_id_table,
                .pm = &pm8916_wdt_pm_ops,
        },
 };
index d776474..9e790f0 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
-#include <linux/of_device.h>
 
 enum wdt_reg {
        WDT_RST,
index 2c95615..5d1c217 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/mfd/rave-sp.h>
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/slab.h>
index c04b383..b293792 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
index ce8f18e..8e1be7b 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/types.h>
 
 #define DWDST                  BIT(1)
 
+#define PON_REASON_SOF_NUM     0xBBBBCCCC
+#define PON_REASON_MAGIC_NUM   0xDDDDDDDD
+#define PON_REASON_EOF_NUM     0xCCCCBBBB
+#define RESERVED_MEM_MIN_SIZE  12
+
 static int heartbeat = DEFAULT_HEARTBEAT;
 
 /*
@@ -198,6 +205,11 @@ static int rti_wdt_probe(struct platform_device *pdev)
        struct rti_wdt_device *wdt;
        struct clk *clk;
        u32 last_ping = 0;
+       struct device_node *node;
+       u32 reserved_mem_size;
+       struct resource res;
+       u32 *vaddr;
+       u64 paddr;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
        if (!wdt)
@@ -284,6 +296,42 @@ static int rti_wdt_probe(struct platform_device *pdev)
                }
        }
 
+       node = of_parse_phandle(pdev->dev.of_node, "memory-region", 0);
+       if (node) {
+               ret = of_address_to_resource(node, 0, &res);
+               if (ret) {
+                       dev_err(dev, "No memory address assigned to the region.\n");
+                       goto err_iomap;
+               }
+
+               /*
+                * If reserved memory is defined for watchdog reset cause.
+                * Readout the Power-on(PON) reason and pass to bootstatus.
+                */
+               paddr = res.start;
+               reserved_mem_size = resource_size(&res);
+               if (reserved_mem_size < RESERVED_MEM_MIN_SIZE) {
+                       dev_err(dev, "The size of reserved memory is too small.\n");
+                       ret = -EINVAL;
+                       goto err_iomap;
+               }
+
+               vaddr = memremap(paddr, reserved_mem_size, MEMREMAP_WB);
+               if (!vaddr) {
+                       dev_err(dev, "Failed to map memory-region.\n");
+                       ret = -ENOMEM;
+                       goto err_iomap;
+               }
+
+               if (vaddr[0] == PON_REASON_SOF_NUM &&
+                   vaddr[1] == PON_REASON_MAGIC_NUM &&
+                   vaddr[2] == PON_REASON_EOF_NUM) {
+                       wdd->bootstatus |= WDIOF_CARDRESET;
+               }
+               memset(vaddr, 0, reserved_mem_size);
+               memunmap(vaddr);
+       }
+
        watchdog_init_timeout(wdd, heartbeat, dev);
 
        ret = watchdog_register_device(wdd);
index fe6c2ed..cb4901b 100644 (file)
@@ -9,9 +9,9 @@
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
index d404953..1741f98 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
index 95416a9..0b4bd88 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/delay.h>
@@ -379,10 +378,11 @@ static int s3c2410wdt_enable(struct s3c2410_wdt *wdt, bool en)
 static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
 {
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
        writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
@@ -399,10 +399,11 @@ static void __s3c2410wdt_stop(struct s3c2410_wdt *wdt)
 static int s3c2410wdt_stop(struct watchdog_device *wdd)
 {
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
        __s3c2410wdt_stop(wdt);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
@@ -411,8 +412,9 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
 {
        unsigned long wtcon;
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
 
        __s3c2410wdt_stop(wdt);
 
@@ -433,7 +435,7 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
        writel(wdt->count, wdt->reg_base + S3C2410_WTDAT);
        writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
        writel(wtcon, wdt->reg_base + S3C2410_WTCON);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
index aeee934..13e7291 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
@@ -255,6 +254,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
        struct sama5d4_wdt *wdt;
        void __iomem *regs;
        u32 irq = 0;
+       u32 reg;
        int ret;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
@@ -305,6 +305,12 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 
        watchdog_init_timeout(wdd, wdt_timeout, dev);
 
+       reg = wdt_read(wdt, AT91_WDT_MR);
+       if (!(reg & AT91_WDT_WDDIS)) {
+               wdt->mr &= ~AT91_WDT_WDDIS;
+               set_bit(WDOG_HW_RUNNING, &wdd->status);
+       }
+
        ret = sama5d4_wdt_init(wdt);
        if (ret)
                return ret;
index fd3cfdd..421ebcd 100644 (file)
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
index 8058fca..5f501b4 100644 (file)
@@ -8,7 +8,8 @@
 #include <linux/clk.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/watchdog.h>
@@ -526,7 +527,6 @@ static void starfive_wdt_shutdown(struct platform_device *pdev)
        starfive_wdt_pm_stop(&wdt->wdd);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int starfive_wdt_suspend(struct device *dev)
 {
        struct starfive_wdt *wdt = dev_get_drvdata(dev);
@@ -556,9 +556,7 @@ static int starfive_wdt_resume(struct device *dev)
 
        return starfive_wdt_start(wdt);
 }
-#endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM
 static int starfive_wdt_runtime_suspend(struct device *dev)
 {
        struct starfive_wdt *wdt = dev_get_drvdata(dev);
@@ -574,11 +572,10 @@ static int starfive_wdt_runtime_resume(struct device *dev)
 
        return starfive_wdt_enable_clock(wdt);
 }
-#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops starfive_wdt_pm_ops = {
-       SET_RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
-       SET_SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
+       RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
+       SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
 };
 
 static const struct of_device_id starfive_wdt_match[] = {
@@ -594,7 +591,7 @@ static struct platform_driver starfive_wdt_driver = {
        .shutdown = starfive_wdt_shutdown,
        .driver = {
                .name = "starfive-wdt",
-               .pm = &starfive_wdt_pm_ops,
+               .pm = pm_ptr(&starfive_wdt_pm_ops),
                .of_match_table = starfive_wdt_match,
        },
 };
index 570a715..d9fd50d 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
@@ -288,7 +287,7 @@ static struct platform_driver stm32_iwdg_driver = {
        .probe          = stm32_iwdg_probe,
        .driver = {
                .name   = "iwdg",
-               .of_match_table = of_match_ptr(stm32_iwdg_of_match),
+               .of_match_table = stm32_iwdg_of_match,
        },
 };
 module_platform_driver(stm32_iwdg_driver);
index 6cf8292..b85354a 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
index d4c5a73..5b55cca 100644 (file)
@@ -161,7 +161,7 @@ static int watchdog_reboot_notifier(struct notifier_block *nb,
        struct watchdog_device *wdd;
 
        wdd = container_of(nb, struct watchdog_device, reboot_nb);
-       if (code == SYS_DOWN || code == SYS_HALT) {
+       if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) {
                if (watchdog_hw_running(wdd)) {
                        int ret;
 
index 2585038..d271e2e 100644 (file)
@@ -9,9 +9,10 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
+#include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
 /* Max timeout is calculated at 100MHz source clock */
@@ -71,7 +72,7 @@ static int xilinx_wwdt_start(struct watchdog_device *wdd)
 
        /* Calculate timeout count */
        time_out = xdev->freq * wdd->timeout;
-       closed_timeout = (time_out * xdev->close_percent) / 100;
+       closed_timeout = div_u64(time_out * xdev->close_percent, 100);
        open_timeout = time_out - closed_timeout;
        wdd->min_hw_heartbeat_ms = xdev->close_percent * 10 * wdd->timeout;
 
index 50c635d..1f77ca0 100644 (file)
@@ -12,3 +12,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 
 ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
 ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
+ceph-$(CONFIG_FS_ENCRYPTION) += crypto.o
index c91b293..c53a1d2 100644 (file)
@@ -140,7 +140,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                newattrs.ia_ctime = current_time(inode);
                newattrs.ia_mode = new_mode;
                newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-               ret = __ceph_setattr(inode, &newattrs);
+               ret = __ceph_setattr(inode, &newattrs, NULL);
                if (ret)
                        goto out_free;
        }
@@ -151,7 +151,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                        newattrs.ia_ctime = old_ctime;
                        newattrs.ia_mode = old_mode;
                        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-                       __ceph_setattr(inode, &newattrs);
+                       __ceph_setattr(inode, &newattrs, NULL);
                }
                goto out_free;
        }
index 59cbfb8..f486307 100644 (file)
@@ -18,6 +18,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include "metric.h"
+#include "crypto.h"
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/striper.h>
 
@@ -242,11 +243,13 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
 
 static void finish_netfs_read(struct ceph_osd_request *req)
 {
-       struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+       struct inode *inode = req->r_inode;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
        struct netfs_io_subrequest *subreq = req->r_priv;
-       int num_pages;
+       struct ceph_osd_req_op *op = &req->r_ops[0];
        int err = req->r_result;
+       bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
 
        ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                 req->r_end_latency, osd_data->length, err);
@@ -260,14 +263,29 @@ static void finish_netfs_read(struct ceph_osd_request *req)
        else if (err == -EBLOCKLISTED)
                fsc->blocklisted = true;
 
-       if (err >= 0 && err < subreq->len)
-               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (err >= 0) {
+               if (sparse && err > 0)
+                       err = ceph_sparse_ext_map_end(op);
+               if (err < subreq->len)
+                       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               if (IS_ENCRYPTED(inode) && err > 0) {
+                       err = ceph_fscrypt_decrypt_extents(inode,
+                                       osd_data->pages, subreq->start,
+                                       op->extent.sparse_ext,
+                                       op->extent.sparse_ext_cnt);
+                       if (err > subreq->len)
+                               err = subreq->len;
+               }
+       }
 
+       if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+               ceph_put_page_vector(osd_data->pages,
+                                    calc_pages_for(osd_data->alignment,
+                                       osd_data->length), false);
+       }
        netfs_subreq_terminated(subreq, err, false);
-
-       num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
-       ceph_put_page_vector(osd_data->pages, num_pages, false);
        iput(req->r_inode);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
@@ -334,10 +352,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        struct ceph_osd_request *req = NULL;
        struct ceph_vino vino = ceph_vino(inode);
        struct iov_iter iter;
-       struct page **pages;
-       size_t page_off;
        int err = 0;
        u64 len = subreq->len;
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 off = subreq->start;
 
        if (ceph_inode_is_shutdown(inode)) {
                err = -EIO;
@@ -347,8 +365,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
                return;
 
-       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
-                       0, 1, CEPH_OSD_OP_READ,
+       ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
+
+       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
+                       off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
                        CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
                        NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
        if (IS_ERR(req)) {
@@ -357,20 +377,48 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
                goto out;
        }
 
+       if (sparse) {
+               err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+               if (err)
+                       goto out;
+       }
+
        dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+
        iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-       err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
-       if (err < 0) {
-               dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
-               goto out;
-       }
 
-       /* should always give us a page-aligned read */
-       WARN_ON_ONCE(page_off);
-       len = err;
-       err = 0;
+       /*
+        * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
+        * encrypted inodes. We'd need infrastructure that handles an iov_iter
+        * instead of page arrays, and we don't have that as of yet. Once the
+        * dust settles on the write helpers and encrypt/decrypt routines for
+        * netfs, we should be able to rework this.
+        */
+       if (IS_ENCRYPTED(inode)) {
+               struct page **pages;
+               size_t page_off;
+
+               err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+               if (err < 0) {
+                       dout("%s: iov_ter_get_pages_alloc returned %d\n",
+                            __func__, err);
+                       goto out;
+               }
+
+               /* should always give us a page-aligned read */
+               WARN_ON_ONCE(page_off);
+               len = err;
+               err = 0;
 
-       osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
+                                                false);
+       } else {
+               osd_req_op_extent_osd_iter(req, 0, &iter);
+       }
+       if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+               err = -EIO;
+               goto out;
+       }
        req->r_callback = finish_netfs_read;
        req->r_priv = subreq;
        req->r_inode = inode;
@@ -571,10 +619,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                                      struct page *page, u64 start)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_snap_context *snapc = page_snap_context(page);
+       struct ceph_snap_context *snapc;
        struct ceph_cap_snap *capsnap = NULL;
        u64 end = i_size_read(inode);
+       u64 ret;
 
+       snapc = page_snap_context(ceph_fscrypt_pagecache_page(page));
        if (snapc != ci->i_head_snapc) {
                bool found = false;
                spin_lock(&ci->i_ceph_lock);
@@ -589,9 +639,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                spin_unlock(&ci->i_ceph_lock);
                WARN_ON(!found);
        }
-       if (end > page_offset(page) + thp_size(page))
-               end = page_offset(page) + thp_size(page);
-       return end > start ? end - start : 0;
+       if (end > ceph_fscrypt_page_offset(page) + thp_size(page))
+               end = ceph_fscrypt_page_offset(page) + thp_size(page);
+       ret = end > start ? end - start : 0;
+       if (ret && fscrypt_is_bounce_page(page))
+               ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE);
+       return ret;
 }
 
 /*
@@ -610,10 +663,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        loff_t page_off = page_offset(page);
        int err;
        loff_t len = thp_size(page);
+       loff_t wlen;
        struct ceph_writeback_ctl ceph_wbc;
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_osd_request *req;
        bool caching = ceph_is_cache_enabled(inode);
+       struct page *bounce_page = NULL;
 
        dout("writepage %p idx %lu\n", page, page->index);
 
@@ -649,31 +704,51 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        if (ceph_wbc.i_size < page_off + len)
                len = ceph_wbc.i_size - page_off;
 
+       wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
        dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
-            inode, page, page->index, page_off, len, snapc, snapc->seq);
+            inode, page, page->index, page_off, wlen, snapc, snapc->seq);
 
        if (atomic_long_inc_return(&fsc->writeback_count) >
            CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
                fsc->write_congested = true;
 
-       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
-                                   CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
-                                   ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
-                                   true);
+       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
+                                   page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE,
+                                   CEPH_OSD_FLAG_WRITE, snapc,
+                                   ceph_wbc.truncate_seq,
+                                   ceph_wbc.truncate_size, true);
        if (IS_ERR(req)) {
                redirty_page_for_writepage(wbc, page);
                return PTR_ERR(req);
        }
 
+       if (wlen < len)
+               len = wlen;
+
        set_page_writeback(page);
        if (caching)
                ceph_set_page_fscache(page);
        ceph_fscache_write_to_cache(inode, page_off, len, caching);
 
+       if (IS_ENCRYPTED(inode)) {
+               bounce_page = fscrypt_encrypt_pagecache_blocks(page,
+                                                   CEPH_FSCRYPT_BLOCK_SIZE, 0,
+                                                   GFP_NOFS);
+               if (IS_ERR(bounce_page)) {
+                       redirty_page_for_writepage(wbc, page);
+                       end_page_writeback(page);
+                       ceph_osdc_put_request(req);
+                       return PTR_ERR(bounce_page);
+               }
+       }
+
        /* it may be a short write due to an object boundary */
        WARN_ON_ONCE(len > thp_size(page));
-       osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
-       dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
+       osd_req_op_extent_osd_data_pages(req, 0,
+                       bounce_page ? &bounce_page : &page, wlen, 0,
+                       false, false);
+       dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n",
+            page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not ");
 
        req->r_mtime = inode->i_mtime;
        ceph_osdc_start_request(osdc, req);
@@ -681,7 +756,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
        ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                  req->r_end_latency, len, err);
-
+       fscrypt_free_bounce_page(bounce_page);
        ceph_osdc_put_request(req);
        if (err == 0)
                err = len;
@@ -800,6 +875,11 @@ static void writepages_finish(struct ceph_osd_request *req)
                total_pages += num_pages;
                for (j = 0; j < num_pages; j++) {
                        page = osd_data->pages[j];
+                       if (fscrypt_is_bounce_page(page)) {
+                               page = fscrypt_pagecache_page(page);
+                               fscrypt_free_bounce_page(osd_data->pages[j]);
+                               osd_data->pages[j] = page;
+                       }
                        BUG_ON(!page);
                        WARN_ON(!PageUptodate(page));
 
@@ -835,6 +915,7 @@ static void writepages_finish(struct ceph_osd_request *req)
        else
                kfree(osd_data->pages);
        ceph_osdc_put_request(req);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 /*
@@ -1070,9 +1151,28 @@ get_more_pages:
                                    fsc->mount_options->congestion_kb))
                                fsc->write_congested = true;
 
-                       pages[locked_pages++] = page;
-                       fbatch.folios[i] = NULL;
+                       if (IS_ENCRYPTED(inode)) {
+                               pages[locked_pages] =
+                                       fscrypt_encrypt_pagecache_blocks(page,
+                                               PAGE_SIZE, 0,
+                                               locked_pages ? GFP_NOWAIT : GFP_NOFS);
+                               if (IS_ERR(pages[locked_pages])) {
+                                       if (PTR_ERR(pages[locked_pages]) == -EINVAL)
+                                               pr_err("%s: inode->i_blkbits=%hhu\n",
+                                                       __func__, inode->i_blkbits);
+                                       /* better not fail on first page! */
+                                       BUG_ON(locked_pages == 0);
+                                       pages[locked_pages] = NULL;
+                                       redirty_page_for_writepage(wbc, page);
+                                       unlock_page(page);
+                                       break;
+                               }
+                               ++locked_pages;
+                       } else {
+                               pages[locked_pages++] = page;
+                       }
 
+                       fbatch.folios[i] = NULL;
                        len += thp_size(page);
                }
 
@@ -1100,7 +1200,7 @@ get_more_pages:
                }
 
 new_request:
-               offset = page_offset(pages[0]);
+               offset = ceph_fscrypt_page_offset(pages[0]);
                len = wsize;
 
                req = ceph_osdc_new_request(&fsc->client->osdc,
@@ -1121,9 +1221,13 @@ new_request:
                                                ceph_wbc.truncate_size, true);
                        BUG_ON(IS_ERR(req));
                }
-               BUG_ON(len < page_offset(pages[locked_pages - 1]) +
-                            thp_size(page) - offset);
+               BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
+                            thp_size(pages[locked_pages - 1]) - offset);
 
+               if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+                       rc = -EIO;
+                       goto release_folios;
+               }
                req->r_callback = writepages_finish;
                req->r_inode = inode;
 
@@ -1132,7 +1236,9 @@ new_request:
                data_pages = pages;
                op_idx = 0;
                for (i = 0; i < locked_pages; i++) {
-                       u64 cur_offset = page_offset(pages[i]);
+                       struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+
+                       u64 cur_offset = page_offset(page);
                        /*
                         * Discontinuity in page range? Ceph can handle that by just passing
                         * multiple extents in the write op.
@@ -1161,9 +1267,9 @@ new_request:
                                op_idx++;
                        }
 
-                       set_page_writeback(pages[i]);
+                       set_page_writeback(page);
                        if (caching)
-                               ceph_set_page_fscache(pages[i]);
+                               ceph_set_page_fscache(page);
                        len += thp_size(page);
                }
                ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1179,8 +1285,16 @@ new_request:
                                                         offset);
                        len = max(len, min_len);
                }
+               if (IS_ENCRYPTED(inode))
+                       len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
+
                dout("writepages got pages at %llu~%llu\n", offset, len);
 
+               if (IS_ENCRYPTED(inode) &&
+                   ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
+                       pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
+                               __func__, offset, len);
+
                osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
                                                 0, from_pool, false);
                osd_req_op_extent_update(req, op_idx, len);
index 09cd6d3..14215ec 100644 (file)
@@ -14,6 +14,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 #include <linux/ceph/decode.h>
 #include <linux/ceph/messenger.h>
 
@@ -1216,15 +1217,11 @@ struct cap_msg_args {
        umode_t                 mode;
        bool                    inline_data;
        bool                    wake;
+       bool                    encrypted;
+       u32                     fscrypt_auth_len;
+       u8                      fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
 };
 
-/*
- * cap struct size + flock buffer size + inline version + inline data size +
- * osd_epoch_barrier + oldest_flush_tid
- */
-#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \
-                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4)
-
 /* Marshal up the cap msg to the MDS */
 static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
 {
@@ -1240,7 +1237,7 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
             arg->size, arg->max_size, arg->xattr_version,
             arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
 
-       msg->hdr.version = cpu_to_le16(10);
+       msg->hdr.version = cpu_to_le16(12);
        msg->hdr.tid = cpu_to_le64(arg->flush_tid);
 
        fc = msg->front.iov_base;
@@ -1257,7 +1254,13 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
        fc->ino = cpu_to_le64(arg->ino);
        fc->snap_follows = cpu_to_le64(arg->follows);
 
-       fc->size = cpu_to_le64(arg->size);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (arg->encrypted)
+               fc->size = cpu_to_le64(round_up(arg->size,
+                                               CEPH_FSCRYPT_BLOCK_SIZE));
+       else
+#endif
+               fc->size = cpu_to_le64(arg->size);
        fc->max_size = cpu_to_le64(arg->max_size);
        ceph_encode_timespec64(&fc->mtime, &arg->mtime);
        ceph_encode_timespec64(&fc->atime, &arg->atime);
@@ -1311,6 +1314,27 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
 
        /* Advisory flags (version 10) */
        ceph_encode_32(&p, arg->flags);
+
+       /* dirstats (version 11) - these are r/o on the client */
+       ceph_encode_64(&p, 0);
+       ceph_encode_64(&p, 0);
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       /*
+        * fscrypt_auth and fscrypt_file (version 12)
+        *
+        * fscrypt_auth holds the crypto context (if any). fscrypt_file
+        * tracks the real i_size as an __le64 field (and we use a rounded-up
+        * i_size in the traditional size field).
+        */
+       ceph_encode_32(&p, arg->fscrypt_auth_len);
+       ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
+       ceph_encode_32(&p, sizeof(__le64));
+       ceph_encode_64(&p, arg->size);
+#else /* CONFIG_FS_ENCRYPTION */
+       ceph_encode_32(&p, 0);
+       ceph_encode_32(&p, 0);
+#endif /* CONFIG_FS_ENCRYPTION */
 }
 
 /*
@@ -1378,7 +1402,6 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
        arg->follows = flushing ? ci->i_head_snapc->seq : 0;
        arg->flush_tid = flush_tid;
        arg->oldest_flush_tid = oldest_flush_tid;
-
        arg->size = i_size_read(inode);
        ci->i_reported_size = arg->size;
        arg->max_size = ci->i_wanted_max_size;
@@ -1432,8 +1455,39 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
                }
        }
        arg->flags = flags;
+       arg->encrypted = IS_ENCRYPTED(inode);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (ci->fscrypt_auth_len &&
+           WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
+               /* Don't set this if it's too big */
+               arg->fscrypt_auth_len = 0;
+       } else {
+               arg->fscrypt_auth_len = ci->fscrypt_auth_len;
+               memcpy(arg->fscrypt_auth, ci->fscrypt_auth,
+                      min_t(size_t, ci->fscrypt_auth_len,
+                            sizeof(arg->fscrypt_auth)));
+       }
+#endif /* CONFIG_FS_ENCRYPTION */
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+       return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
+}
+#else
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+       return CAP_MSG_FIXED_FIELDS;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
 /*
  * Send a cap msg on the given inode.
  *
@@ -1444,7 +1498,8 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
        struct ceph_msg *msg;
        struct inode *inode = &ci->netfs.inode;
 
-       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
+       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
+                          false);
        if (!msg) {
                pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
                       ceph_vinop(inode), ceph_cap_string(arg->dirty),
@@ -1470,10 +1525,6 @@ static inline int __send_flush_snap(struct inode *inode,
        struct cap_msg_args     arg;
        struct ceph_msg         *msg;
 
-       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
-       if (!msg)
-               return -ENOMEM;
-
        arg.session = session;
        arg.ino = ceph_vino(inode).ino;
        arg.cid = 0;
@@ -1510,6 +1561,15 @@ static inline int __send_flush_snap(struct inode *inode,
        arg.inline_data = capsnap->inline_data;
        arg.flags = 0;
        arg.wake = false;
+       arg.encrypted = IS_ENCRYPTED(inode);
+
+       /* No fscrypt_auth changes from a capsnap.*/
+       arg.fscrypt_auth_len = 0;
+
+       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
+                          GFP_NOFS, false);
+       if (!msg)
+               return -ENOMEM;
 
        encode_cap_msg(msg, &arg);
        ceph_con_send(&arg.session->s_con, msg);
@@ -2900,10 +2960,9 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
+int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
+                   int want, loff_t endoff, int *got)
 {
-       struct ceph_file_info *fi = filp->private_data;
-       struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        int ret, _got, flags;
@@ -2912,7 +2971,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
        if (ret < 0)
                return ret;
 
-       if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+       if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
            fi->filp_gen != READ_ONCE(fsc->filp_gen))
                return -EBADF;
 
@@ -2965,7 +3024,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
                                continue;
                }
 
-               if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+               if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
                    fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
                        if (ret >= 0 && _got)
                                ceph_put_cap_refs(ci, _got);
@@ -3028,6 +3087,15 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
        return 0;
 }
 
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
+                 int *got)
+{
+       struct ceph_file_info *fi = filp->private_data;
+       struct inode *inode = file_inode(filp);
+
+       return __ceph_get_caps(inode, fi, need, want, endoff, got);
+}
+
 /*
  * Take cap refs.  Caller must already know we hold at least one ref
  * on the caps in question or we don't know this is safe.
@@ -3323,6 +3391,9 @@ struct cap_extra_info {
        /* currently issued */
        int issued;
        struct timespec64 btime;
+       u8 *fscrypt_auth;
+       u32 fscrypt_auth_len;
+       u64 fscrypt_file_size;
 };
 
 /*
@@ -3355,6 +3426,14 @@ static void handle_cap_grant(struct inode *inode,
        bool deleted_inode = false;
        bool fill_inline = false;
 
+       /*
+        * If there is at least one crypto block then we'll trust
+        * fscrypt_file_size. If the real length of the file is 0, then
+        * ignore it (it has probably been truncated down to 0 by the MDS).
+        */
+       if (IS_ENCRYPTED(inode) && size)
+               size = extra_info->fscrypt_file_size;
+
        dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
             inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
        dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
@@ -3421,6 +3500,14 @@ static void handle_cap_grant(struct inode *inode,
                dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
                     from_kuid(&init_user_ns, inode->i_uid),
                     from_kgid(&init_user_ns, inode->i_gid));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+               if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
+                   memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
+                          ci->fscrypt_auth_len))
+                       pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
+                               __func__, ci->fscrypt_auth_len,
+                               extra_info->fscrypt_auth_len);
+#endif
        }
 
        if ((newcaps & CEPH_CAP_LINK_SHARED) &&
@@ -3837,7 +3924,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
  */
 static bool handle_cap_trunc(struct inode *inode,
                             struct ceph_mds_caps *trunc,
-                            struct ceph_mds_session *session)
+                            struct ceph_mds_session *session,
+                            struct cap_extra_info *extra_info)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -3854,8 +3942,16 @@ static bool handle_cap_trunc(struct inode *inode,
 
        issued |= implemented | dirty;
 
-       dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
-            inode, mds, seq, truncate_size, truncate_seq);
+       /*
+        * If there is at least one crypto block then we'll trust
+        * fscrypt_file_size. If the real length of the file is 0, then
+        * ignore it (it has probably been truncated down to 0 by the MDS).
+        */
+       if (IS_ENCRYPTED(inode) && size)
+               size = extra_info->fscrypt_file_size;
+
+       dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
+            __func__, inode, mds, seq, truncate_size, truncate_seq);
        queue_trunc = ceph_fill_file_size(inode, issued,
                                          truncate_seq, truncate_size, size);
        return queue_trunc;
@@ -4075,6 +4171,52 @@ retry:
        *target_cap = cap;
 }
 
+#ifdef CONFIG_FS_ENCRYPTION
+static int parse_fscrypt_fields(void **p, void *end,
+                               struct cap_extra_info *extra)
+{
+       u32 len;
+
+       ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad);
+       if (extra->fscrypt_auth_len) {
+               ceph_decode_need(p, end, extra->fscrypt_auth_len, bad);
+               extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len,
+                                             GFP_KERNEL);
+               if (!extra->fscrypt_auth)
+                       return -ENOMEM;
+               ceph_decode_copy_safe(p, end, extra->fscrypt_auth,
+                                       extra->fscrypt_auth_len, bad);
+       }
+
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len >= sizeof(u64)) {
+               ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad);
+               len -= sizeof(u64);
+       }
+       ceph_decode_skip_n(p, end, len, bad);
+       return 0;
+bad:
+       return -EIO;
+}
+#else
+static int parse_fscrypt_fields(void **p, void *end,
+                               struct cap_extra_info *extra)
+{
+       u32 len;
+
+       /* Don't care about these fields unless we're encryption-capable */
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len)
+               ceph_decode_skip_n(p, end, len, bad);
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len)
+               ceph_decode_skip_n(p, end, len, bad);
+       return 0;
+bad:
+       return -EIO;
+}
+#endif
+
 /*
  * Handle a caps message from the MDS.
  *
@@ -4105,6 +4247,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
        dout("handle_caps from mds%d\n", session->s_mds);
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        end = msg->front.iov_base + msg->front.iov_len;
        if (msg->front.iov_len < sizeof(*h))
@@ -4195,13 +4340,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
        }
 
+       if (msg_version >= 12) {
+               if (parse_fscrypt_fields(&p, end, &extra_info))
+                       goto bad;
+       }
+
        /* lookup ino */
        inode = ceph_find_inode(mdsc->fsc->sb, vino);
        dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
             vino.snap, inode);
 
        mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
 
@@ -4292,7 +4441,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                break;
 
        case CEPH_CAP_OP_TRUNC:
-               queue_trunc = handle_cap_trunc(inode, h, session);
+               queue_trunc = handle_cap_trunc(inode, h, session,
+                                               &extra_info);
                spin_unlock(&ci->i_ceph_lock);
                if (queue_trunc)
                        ceph_queue_vmtruncate(inode);
@@ -4309,12 +4459,15 @@ done:
 done_unlocked:
        iput(inode);
 out:
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        ceph_put_string(extra_info.pool_ns);
 
        /* Defer closing the sessions after s_mutex lock being released */
        if (close_sessions)
                ceph_mdsc_close_sessions(mdsc);
 
+       kfree(extra_info.fscrypt_auth);
        return;
 
 flush_cap_releases:
@@ -4611,6 +4764,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
        return ret;
 }
 
+/**
+ * ceph_encode_dentry_release - encode a dentry release into an outgoing request
+ * @p: outgoing request buffer
+ * @dentry: dentry to release
+ * @dir: dir to release it from
+ * @mds: mds that we're speaking to
+ * @drop: caps being dropped
+ * @unless: unless we have these caps
+ *
+ * Encode a dentry release into an outgoing request buffer. Returns 1 if the
+ * thing was released, or a negative error code otherwise.
+ */
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
                               struct inode *dir,
                               int mds, int drop, int unless)
@@ -4643,13 +4808,25 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
        if (ret && di->lease_session && di->lease_session->s_mds == mds) {
                dout("encode_dentry_release %p mds%d seq %d\n",
                     dentry, mds, (int)di->lease_seq);
-               rel->dname_len = cpu_to_le32(dentry->d_name.len);
-               memcpy(*p, dentry->d_name.name, dentry->d_name.len);
-               *p += dentry->d_name.len;
                rel->dname_seq = cpu_to_le32(di->lease_seq);
                __ceph_mdsc_drop_dentry_lease(dentry);
+               spin_unlock(&dentry->d_lock);
+               if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
+                       int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
+
+                       if (ret2 < 0)
+                               return ret2;
+
+                       rel->dname_len = cpu_to_le32(ret2);
+                       *p += ret2;
+               } else {
+                       rel->dname_len = cpu_to_le32(dentry->d_name.len);
+                       memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+                       *p += dentry->d_name.len;
+               }
+       } else {
+               spin_unlock(&dentry->d_lock);
        }
-       spin_unlock(&dentry->d_lock);
        return ret;
 }
 
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
new file mode 100644 (file)
index 0000000..e4d5cd5
--- /dev/null
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The base64 encode/decode code was copied from fscrypt:
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ * Written by Uday Savagaonkar, 2014.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#include <linux/ceph/ceph_debug.h>
+#include <linux/xattr.h>
+#include <linux/fscrypt.h>
+#include <linux/ceph/striper.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "crypto.h"
+
+/*
+ * The base64url encoding used by fscrypt includes the '_' character, which may
+ * cause problems in snapshot names (which can not start with '_').  Thus, we
+ * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
+ * which replaces '-' and '_' by '+' and ','.
+ */
+static const char base64_table[65] =
+       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst)
+{
+       u32 ac = 0;
+       int bits = 0;
+       int i;
+       char *cp = dst;
+
+       for (i = 0; i < srclen; i++) {
+               ac = (ac << 8) | src[i];
+               bits += 8;
+               do {
+                       bits -= 6;
+                       *cp++ = base64_table[(ac >> bits) & 0x3f];
+               } while (bits >= 6);
+       }
+       if (bits)
+               *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
+       return cp - dst;
+}
+
+int ceph_base64_decode(const char *src, int srclen, u8 *dst)
+{
+       u32 ac = 0;
+       int bits = 0;
+       int i;
+       u8 *bp = dst;
+
+       for (i = 0; i < srclen; i++) {
+               const char *p = strchr(base64_table, src[i]);
+
+               if (p == NULL || src[i] == 0)
+                       return -1;
+               ac = (ac << 6) | (p - base64_table);
+               bits += 6;
+               if (bits >= 8) {
+                       bits -= 8;
+                       *bp++ = (u8)(ac >> bits);
+               }
+       }
+       if (ac & ((1 << bits) - 1))
+               return -1;
+       return bp - dst;
+}
+
+static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth;
+       u32 ctxlen;
+
+       /* Non existent or too short? */
+       if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1)))
+               return -ENOBUFS;
+
+       /* Some format we don't recognize? */
+       if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION)
+               return -ENOBUFS;
+
+       ctxlen = le32_to_cpu(cfa->cfa_blob_len);
+       if (len < ctxlen)
+               return -ERANGE;
+
+       memcpy(ctx, cfa->cfa_blob, ctxlen);
+       return ctxlen;
+}
+
+static int ceph_crypt_set_context(struct inode *inode, const void *ctx,
+                                 size_t len, void *fs_data)
+{
+       int ret;
+       struct iattr attr = { };
+       struct ceph_iattr cia = { };
+       struct ceph_fscrypt_auth *cfa;
+
+       WARN_ON_ONCE(fs_data);
+
+       if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE)
+               return -EINVAL;
+
+       cfa = kzalloc(sizeof(*cfa), GFP_KERNEL);
+       if (!cfa)
+               return -ENOMEM;
+
+       cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+       cfa->cfa_blob_len = cpu_to_le32(len);
+       memcpy(cfa->cfa_blob, ctx, len);
+
+       cia.fscrypt_auth = cfa;
+
+       ret = __ceph_setattr(inode, &attr, &cia);
+       if (ret == 0)
+               inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+       kfree(cia.fscrypt_auth);
+       return ret;
+}
+
+static bool ceph_crypt_empty_dir(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       return ci->i_rsubdirs + ci->i_rfiles == 1;
+}
+
+static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb)
+{
+       return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy;
+}
+
+static struct fscrypt_operations ceph_fscrypt_ops = {
+       .get_context            = ceph_crypt_get_context,
+       .set_context            = ceph_crypt_set_context,
+       .get_dummy_policy       = ceph_get_dummy_policy,
+       .empty_dir              = ceph_crypt_empty_dir,
+};
+
+void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+       fscrypt_set_ops(sb, &ceph_fscrypt_ops);
+}
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+       fscrypt_free_dummy_policy(&fsc->fsc_dummy_enc_policy);
+}
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+                                struct ceph_acl_sec_ctx *as)
+{
+       int ret, ctxsize;
+       bool encrypted = false;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       ret = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+       if (ret)
+               return ret;
+       if (!encrypted)
+               return 0;
+
+       as->fscrypt_auth = kzalloc(sizeof(*as->fscrypt_auth), GFP_KERNEL);
+       if (!as->fscrypt_auth)
+               return -ENOMEM;
+
+       ctxsize = fscrypt_context_for_new_inode(as->fscrypt_auth->cfa_blob,
+                                               inode);
+       if (ctxsize < 0)
+               return ctxsize;
+
+       as->fscrypt_auth->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+       as->fscrypt_auth->cfa_blob_len = cpu_to_le32(ctxsize);
+
+       WARN_ON_ONCE(ci->fscrypt_auth);
+       kfree(ci->fscrypt_auth);
+       ci->fscrypt_auth_len = ceph_fscrypt_auth_len(as->fscrypt_auth);
+       ci->fscrypt_auth = kmemdup(as->fscrypt_auth, ci->fscrypt_auth_len,
+                                  GFP_KERNEL);
+       if (!ci->fscrypt_auth)
+               return -ENOMEM;
+
+       inode->i_flags |= S_ENCRYPTED;
+
+       return 0;
+}
+
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                               struct ceph_acl_sec_ctx *as)
+{
+       swap(req->r_fscrypt_auth, as->fscrypt_auth);
+}
+
+/*
+ * User-created snapshots can't start with '_'.  Snapshots that start with this
+ * character are special (hint: there aren't real snapshots) and use the
+ * following format:
+ *
+ *   _<SNAPSHOT-NAME>_<INODE-NUMBER>
+ *
+ * where:
+ *  - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted,
+ *  - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot
+ *
+ * This function parses these snapshot names and returns the inode
+ * <INODE-NUMBER>.  'name_len' will also bet set with the <SNAPSHOT-NAME>
+ * length.
+ */
+static struct inode *parse_longname(const struct inode *parent,
+                                   const char *name, int *name_len)
+{
+       struct inode *dir = NULL;
+       struct ceph_vino vino = { .snap = CEPH_NOSNAP };
+       char *inode_number;
+       char *name_end;
+       int orig_len = *name_len;
+       int ret = -EIO;
+
+       /* Skip initial '_' */
+       name++;
+       name_end = strrchr(name, '_');
+       if (!name_end) {
+               dout("Failed to parse long snapshot name: %s\n", name);
+               return ERR_PTR(-EIO);
+       }
+       *name_len = (name_end - name);
+       if (*name_len <= 0) {
+               pr_err("Failed to parse long snapshot name\n");
+               return ERR_PTR(-EIO);
+       }
+
+       /* Get the inode number */
+       inode_number = kmemdup_nul(name_end + 1,
+                                  orig_len - *name_len - 2,
+                                  GFP_KERNEL);
+       if (!inode_number)
+               return ERR_PTR(-ENOMEM);
+       ret = kstrtou64(inode_number, 10, &vino.ino);
+       if (ret) {
+               dout("Failed to parse inode number: %s\n", name);
+               dir = ERR_PTR(ret);
+               goto out;
+       }
+
+       /* And finally the inode */
+       dir = ceph_find_inode(parent->i_sb, vino);
+       if (!dir) {
+               /* This can happen if we're not mounting cephfs on the root */
+               dir = ceph_get_inode(parent->i_sb, vino, NULL);
+               if (!dir)
+                       dir = ERR_PTR(-ENOENT);
+       }
+       if (IS_ERR(dir))
+               dout("Can't find inode %s (%s)\n", inode_number, name);
+
+out:
+       kfree(inode_number);
+       return dir;
+}
+
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+                               char *buf)
+{
+       struct inode *dir = parent;
+       struct qstr iname;
+       u32 len;
+       int name_len;
+       int elen;
+       int ret;
+       u8 *cryptbuf = NULL;
+
+       iname.name = d_name->name;
+       name_len = d_name->len;
+
+       /* Handle the special case of snapshot names that start with '_' */
+       if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+           (iname.name[0] == '_')) {
+               dir = parse_longname(parent, iname.name, &name_len);
+               if (IS_ERR(dir))
+                       return PTR_ERR(dir);
+               iname.name++; /* skip initial '_' */
+       }
+       iname.len = name_len;
+
+       if (!fscrypt_has_encryption_key(dir)) {
+               memcpy(buf, d_name->name, d_name->len);
+               elen = d_name->len;
+               goto out;
+       }
+
+       /*
+        * Convert cleartext d_name to ciphertext. If result is longer than
+        * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes
+        *
+        * See: fscrypt_setup_filename
+        */
+       if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) {
+               elen = -ENAMETOOLONG;
+               goto out;
+       }
+
+       /* Allocate a buffer appropriate to hold the result */
+       cryptbuf = kmalloc(len > CEPH_NOHASH_NAME_MAX ? NAME_MAX : len,
+                          GFP_KERNEL);
+       if (!cryptbuf) {
+               elen = -ENOMEM;
+               goto out;
+       }
+
+       ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len);
+       if (ret) {
+               elen = ret;
+               goto out;
+       }
+
+       /* hash the end if the name is long enough */
+       if (len > CEPH_NOHASH_NAME_MAX) {
+               u8 hash[SHA256_DIGEST_SIZE];
+               u8 *extra = cryptbuf + CEPH_NOHASH_NAME_MAX;
+
+               /*
+                * hash the extra bytes and overwrite crypttext beyond that
+                * point with it
+                */
+               sha256(extra, len - CEPH_NOHASH_NAME_MAX, hash);
+               memcpy(extra, hash, SHA256_DIGEST_SIZE);
+               len = CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE;
+       }
+
+       /* base64 encode the encrypted name */
+       elen = ceph_base64_encode(cryptbuf, len, buf);
+       dout("base64-encoded ciphertext name = %.*s\n", elen, buf);
+
+       /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
+       WARN_ON(elen > 240);
+       if ((elen > 0) && (dir != parent)) {
+               char tmp_buf[NAME_MAX];
+
+               elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+                               elen, buf, dir->i_ino);
+               memcpy(buf, tmp_buf, elen);
+       }
+
+out:
+       kfree(cryptbuf);
+       if (dir != parent) {
+               if ((dir->i_state & I_NEW))
+                       discard_new_inode(dir);
+               else
+                       iput(dir);
+       }
+       return elen;
+}
+
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+                               char *buf)
+{
+       WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
+
+       return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf);
+}
+
+/**
+ * ceph_fname_to_usr - convert a filename for userland presentation
+ * @fname: ceph_fname to be converted
+ * @tname: temporary name buffer to use for conversion (may be NULL)
+ * @oname: where converted name should be placed
+ * @is_nokey: set to true if key wasn't available during conversion (may be NULL)
+ *
+ * Given a filename (usually from the MDS), format it for presentation to
+ * userland. If @parent is not encrypted, just pass it back as-is.
+ *
+ * Otherwise, base64 decode the string, and then ask fscrypt to format it
+ * for userland presentation.
+ *
+ * Returns 0 on success or negative error code on error.
+ */
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+                     struct fscrypt_str *oname, bool *is_nokey)
+{
+       struct inode *dir = fname->dir;
+       struct fscrypt_str _tname = FSTR_INIT(NULL, 0);
+       struct fscrypt_str iname;
+       char *name = fname->name;
+       int name_len = fname->name_len;
+       int ret;
+
+       /* Sanity check that the resulting name will fit in the buffer */
+       if (fname->name_len > NAME_MAX || fname->ctext_len > NAME_MAX)
+               return -EIO;
+
+       /* Handle the special case of snapshot names that start with '_' */
+       if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+           (name[0] == '_')) {
+               dir = parse_longname(dir, name, &name_len);
+               if (IS_ERR(dir))
+                       return PTR_ERR(dir);
+               name++; /* skip initial '_' */
+       }
+
+       if (!IS_ENCRYPTED(dir)) {
+               oname->name = fname->name;
+               oname->len = fname->name_len;
+               ret = 0;
+               goto out_inode;
+       }
+
+       ret = ceph_fscrypt_prepare_readdir(dir);
+       if (ret)
+               goto out_inode;
+
+       /*
+        * Use the raw dentry name as sent by the MDS instead of
+        * generating a nokey name via fscrypt.
+        */
+       if (!fscrypt_has_encryption_key(dir)) {
+               if (fname->no_copy)
+                       oname->name = fname->name;
+               else
+                       memcpy(oname->name, fname->name, fname->name_len);
+               oname->len = fname->name_len;
+               if (is_nokey)
+                       *is_nokey = true;
+               ret = 0;
+               goto out_inode;
+       }
+
+       if (fname->ctext_len == 0) {
+               int declen;
+
+               if (!tname) {
+                       ret = fscrypt_fname_alloc_buffer(NAME_MAX, &_tname);
+                       if (ret)
+                               goto out_inode;
+                       tname = &_tname;
+               }
+
+               declen = ceph_base64_decode(name, name_len, tname->name);
+               if (declen <= 0) {
+                       ret = -EIO;
+                       goto out;
+               }
+               iname.name = tname->name;
+               iname.len = declen;
+       } else {
+               iname.name = fname->ctext;
+               iname.len = fname->ctext_len;
+       }
+
+       ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname);
+       if (!ret && (dir != fname->dir)) {
+               char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)];
+
+               name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+                                   oname->len, oname->name, dir->i_ino);
+               memcpy(oname->name, tmp_buf, name_len);
+               oname->len = name_len;
+       }
+
+out:
+       fscrypt_fname_free_buffer(&_tname);
+out_inode:
+       if ((dir != fname->dir) && !IS_ERR(dir)) {
+               if ((dir->i_state & I_NEW))
+                       discard_new_inode(dir);
+               else
+                       iput(dir);
+       }
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper
+ * @dir: directory inode for readdir prep
+ *
+ * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as
+ * non-complete if this call results in having the directory unlocked.
+ *
+ * Returns:
+ *     1 - if directory was locked and key is now loaded (i.e. dir is unlocked)
+ *     0 - if directory is still locked
+ *   < 0 - if __fscrypt_prepare_readdir() fails
+ */
+int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+       bool had_key = fscrypt_has_encryption_key(dir);
+       int err;
+
+       if (!IS_ENCRYPTED(dir))
+               return 0;
+
+       err = __fscrypt_prepare_readdir(dir);
+       if (err)
+               return err;
+       if (!had_key && fscrypt_has_encryption_key(dir)) {
+               /* directory just got unlocked, mark it as not complete */
+               ceph_dir_clear_complete(dir);
+               return 1;
+       }
+       return 0;
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num)
+{
+       dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+       return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num);
+}
+
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num,
+                                 gfp_t gfp_flags)
+{
+       dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+       return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num,
+                                            gfp_flags);
+}
+
+/**
+ * ceph_fscrypt_decrypt_pages - decrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the read data starts
+ * @len: max length to decrypt
+ *
+ * Decrypt an array of fscrypt'ed pages and return the amount of
+ * data decrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored (and should
+ * probably be zeroed by the caller).
+ *
+ * Returns the length of the decrypted data or a negative errno.
+ */
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+                              u64 off, int len)
+{
+       int i, num_blocks;
+       u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       int ret = 0;
+
+       /*
+        * We can't deal with partial blocks on an encrypted file, so mask off
+        * the last bit.
+        */
+       num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+       /* Decrypt each block */
+       for (i = 0; i < num_blocks; ++i) {
+               int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+               int pgidx = blkoff >> PAGE_SHIFT;
+               unsigned int pgoffs = offset_in_page(blkoff);
+               int fret;
+
+               fret = ceph_fscrypt_decrypt_block_inplace(inode, page[pgidx],
+                               CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+                               baseblk + i);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret += CEPH_FSCRYPT_BLOCK_SIZE;
+       }
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer
+ * @inode: inode associated with pages being decrypted
+ * @page: pointer to page array
+ * @off: offset into the file that the data in page[0] starts
+ * @map: pointer to extent array
+ * @ext_cnt: length of extent array
+ *
+ * Given an extent map and a page array, decrypt the received data in-place,
+ * skipping holes. Returns the offset into buffer of end of last decrypted
+ * block.
+ */
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+                                u64 off, struct ceph_sparse_extent *map,
+                                u32 ext_cnt)
+{
+       int i, ret = 0;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       u64 objno, objoff;
+       u32 xlen;
+
+       /* Nothing to do for empty array */
+       if (ext_cnt == 0) {
+               dout("%s: empty array, ret 0\n", __func__);
+               return 0;
+       }
+
+       ceph_calc_file_object_mapping(&ci->i_layout, off, map[0].len,
+                                     &objno, &objoff, &xlen);
+
+       for (i = 0; i < ext_cnt; ++i) {
+               struct ceph_sparse_extent *ext = &map[i];
+               int pgsoff = ext->off - objoff;
+               int pgidx = pgsoff >> PAGE_SHIFT;
+               int fret;
+
+               if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) {
+                       pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n",
+                               __func__, i, ext->off, ext->len);
+                       return -EIO;
+               }
+               fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx],
+                                                off + pgsoff, ext->len);
+               dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i,
+                               ext->off, ext->len, fret);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret = pgsoff + fret;
+       }
+       dout("%s: ret %d\n", __func__, ret);
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_encrypt_pages - encrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the data starts
+ * @len: max length to encrypt
+ * @gfp: gfp flags to use for allocation
+ *
+ * Decrypt an array of cleartext pages and return the amount of
+ * data encrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored.
+ *
+ * Returns the length of the encrypted data or a negative errno.
+ */
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+                               int len, gfp_t gfp)
+{
+       int i, num_blocks;
+       u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       int ret = 0;
+
+       /*
+        * We can't deal with partial blocks on an encrypted file, so mask off
+        * the last bit.
+        */
+       num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+       /* Encrypt each block */
+       for (i = 0; i < num_blocks; ++i) {
+               int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+               int pgidx = blkoff >> PAGE_SHIFT;
+               unsigned int pgoffs = offset_in_page(blkoff);
+               int fret;
+
+               fret = ceph_fscrypt_encrypt_block_inplace(inode, page[pgidx],
+                               CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+                               baseblk + i, gfp);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret += CEPH_FSCRYPT_BLOCK_SIZE;
+       }
+       return ret;
+}
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
new file mode 100644 (file)
index 0000000..47e0c31
--- /dev/null
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ceph fscrypt functionality
+ */
+
+#ifndef _CEPH_CRYPTO_H
+#define _CEPH_CRYPTO_H
+
+#include <crypto/sha2.h>
+#include <linux/fscrypt.h>
+
+#define CEPH_FSCRYPT_BLOCK_SHIFT   12
+#define CEPH_FSCRYPT_BLOCK_SIZE    (_AC(1, UL) << CEPH_FSCRYPT_BLOCK_SHIFT)
+#define CEPH_FSCRYPT_BLOCK_MASK           (~(CEPH_FSCRYPT_BLOCK_SIZE-1))
+
+struct ceph_fs_client;
+struct ceph_acl_sec_ctx;
+struct ceph_mds_request;
+
+struct ceph_fname {
+       struct inode    *dir;
+       char            *name;          // b64 encoded, possibly hashed
+       unsigned char   *ctext;         // binary crypttext (if any)
+       u32             name_len;       // length of name buffer
+       u32             ctext_len;      // length of crypttext
+       bool            no_copy;
+};
+
+/*
+ * Header for the crypted file when truncating the size, this
+ * will be sent to MDS, and the MDS will update the encrypted
+ * last block and then truncate the size.
+ */
+struct ceph_fscrypt_truncate_size_header {
+       __u8  ver;
+       __u8  compat;
+
+       /*
+        * It will be sizeof(assert_ver + file_offset + block_size)
+        * if the last block is empty when it's located in a file
+        * hole. Or the data_len will plus CEPH_FSCRYPT_BLOCK_SIZE.
+        */
+       __le32 data_len;
+
+       __le64 change_attr;
+       __le64 file_offset;
+       __le32 block_size;
+} __packed;
+
+struct ceph_fscrypt_auth {
+       __le32  cfa_version;
+       __le32  cfa_blob_len;
+       u8      cfa_blob[FSCRYPT_SET_CONTEXT_MAX_SIZE];
+} __packed;
+
+#define CEPH_FSCRYPT_AUTH_VERSION      1
+static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa)
+{
+       u32 ctxsize = le32_to_cpu(fa->cfa_blob_len);
+
+       return offsetof(struct ceph_fscrypt_auth, cfa_blob) + ctxsize;
+}
+
+#ifdef CONFIG_FS_ENCRYPTION
+/*
+ * We want to encrypt filenames when creating them, but the encrypted
+ * versions of those names may have illegal characters in them. To mitigate
+ * that, we base64 encode them, but that gives us a result that can exceed
+ * NAME_MAX.
+ *
+ * Follow a similar scheme to fscrypt itself, and cap the filename to a
+ * smaller size. If the ciphertext name is longer than the value below, then
+ * sha256 hash the remaining bytes.
+ *
+ * For the fscrypt_nokey_name struct the dirhash[2] member is useless in ceph
+ * so the corresponding struct will be:
+ *
+ * struct fscrypt_ceph_nokey_name {
+ *     u8 bytes[157];
+ *     u8 sha256[SHA256_DIGEST_SIZE];
+ * }; // 180 bytes => 240 bytes base64-encoded, which is <= NAME_MAX (255)
+ *
+ * (240 bytes is the maximum size allowed for snapshot names to take into
+ *  account the format: '_<SNAPSHOT-NAME>_<INODE-NUMBER>'.)
+ *
+ * Note that for long names that end up having their tail portion hashed, we
+ * must also store the full encrypted name (in the dentry's alternate_name
+ * field).
+ */
+#define CEPH_NOHASH_NAME_MAX (180 - SHA256_DIGEST_SIZE)
+
+#define CEPH_BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst);
+int ceph_base64_decode(const char *src, int srclen, u8 *dst);
+
+void ceph_fscrypt_set_ops(struct super_block *sb);
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc);
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+                                struct ceph_acl_sec_ctx *as);
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                               struct ceph_acl_sec_ctx *as);
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+                               char *buf);
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+                               char *buf);
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       if (!IS_ENCRYPTED(parent))
+               return 0;
+       return fscrypt_fname_alloc_buffer(NAME_MAX, fname);
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       if (IS_ENCRYPTED(parent))
+               fscrypt_fname_free_buffer(fname);
+}
+
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+                     struct fscrypt_str *oname, bool *is_nokey);
+int ceph_fscrypt_prepare_readdir(struct inode *dir);
+
+static inline unsigned int ceph_fscrypt_blocks(u64 off, u64 len)
+{
+       /* crypto blocks cannot span more than one page */
+       BUILD_BUG_ON(CEPH_FSCRYPT_BLOCK_SHIFT > PAGE_SHIFT);
+
+       return ((off+len+CEPH_FSCRYPT_BLOCK_SIZE-1) >> CEPH_FSCRYPT_BLOCK_SHIFT) -
+               (off >> CEPH_FSCRYPT_BLOCK_SHIFT);
+}
+
+/*
+ * If we have an encrypted inode then we must adjust the offset and
+ * range of the on-the-wire read to cover an entire encryption block.
+ * The copy will be done using the original offset and length, after
+ * we've decrypted the result.
+ */
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+                                                  u64 *off, u64 *len)
+{
+       if (IS_ENCRYPTED(inode)) {
+               *len = ceph_fscrypt_blocks(*off, *len) * CEPH_FSCRYPT_BLOCK_SIZE;
+               *off &= CEPH_FSCRYPT_BLOCK_MASK;
+       }
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num);
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num,
+                                 gfp_t gfp_flags);
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+                              u64 off, int len);
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+                                u64 off, struct ceph_sparse_extent *map,
+                                u32 ext_cnt);
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+                              int len, gfp_t gfp);
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+       return fscrypt_is_bounce_page(page) ? fscrypt_pagecache_page(page) : page;
+}
+
+#else /* CONFIG_FS_ENCRYPTION */
+
+static inline void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+}
+
+static inline void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+}
+
+static inline int ceph_fscrypt_prepare_context(struct inode *dir,
+                                              struct inode *inode,
+                                              struct ceph_acl_sec_ctx *as)
+{
+       if (IS_ENCRYPTED(dir))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+static inline void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                                               struct ceph_acl_sec_ctx *as_ctx)
+{
+}
+
+static inline int ceph_encode_encrypted_dname(struct inode *parent,
+                                             struct qstr *d_name, char *buf)
+{
+       memcpy(buf, d_name->name, d_name->len);
+       return d_name->len;
+}
+
+static inline int ceph_encode_encrypted_fname(struct inode *parent,
+                                             struct dentry *dentry, char *buf)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       return 0;
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+}
+
+static inline int ceph_fname_to_usr(const struct ceph_fname *fname,
+                                   struct fscrypt_str *tname,
+                                   struct fscrypt_str *oname, bool *is_nokey)
+{
+       oname->name = fname->name;
+       oname->len = fname->name_len;
+       return 0;
+}
+
+static inline int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+       return 0;
+}
+
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+                                                  u64 *off, u64 *len)
+{
+}
+
+static inline int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                         struct page *page, unsigned int len,
+                                         unsigned int offs, u64 lblk_num)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                         struct page *page, unsigned int len,
+                                         unsigned int offs, u64 lblk_num,
+                                         gfp_t gfp_flags)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_pages(struct inode *inode,
+                                            struct page **page, u64 off,
+                                            int len)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_extents(struct inode *inode,
+                                              struct page **page, u64 off,
+                                              struct ceph_sparse_extent *map,
+                                              u32 ext_cnt)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_pages(struct inode *inode,
+                                            struct page **page, u64 off,
+                                            int len, gfp_t gfp)
+{
+       return 0;
+}
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+       return page;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
+static inline loff_t ceph_fscrypt_page_offset(struct page *page)
+{
+       return page_offset(ceph_fscrypt_pagecache_page(page));
+}
+
+#endif /* _CEPH_CRYPTO_H */
index bdcffb0..854cbdd 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
@@ -241,7 +242,9 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
                di = ceph_dentry(dentry);
                if (d_unhashed(dentry) ||
                    d_really_is_negative(dentry) ||
-                   di->lease_shared_gen != shared_gen) {
+                   di->lease_shared_gen != shared_gen ||
+                   ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
+                    fscrypt_has_encryption_key(dir))) {
                        spin_unlock(&dentry->d_lock);
                        dput(dentry);
                        err = -EAGAIN;
@@ -340,6 +343,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
                ctx->pos = 2;
        }
 
+       err = ceph_fscrypt_prepare_readdir(inode);
+       if (err < 0)
+               return err;
+
        spin_lock(&ci->i_ceph_lock);
        /* request Fx cap. if have Fx, we don't need to release Fs cap
         * for later create/unlink. */
@@ -389,6 +396,7 @@ more:
                req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
                if (IS_ERR(req))
                        return PTR_ERR(req);
+
                err = ceph_alloc_readdir_reply_buffer(req, inode);
                if (err) {
                        ceph_mdsc_put_request(req);
@@ -402,11 +410,21 @@ more:
                        req->r_inode_drop = CEPH_CAP_FILE_EXCL;
                }
                if (dfi->last_name) {
-                       req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
+                       struct qstr d_name = { .name = dfi->last_name,
+                                              .len = strlen(dfi->last_name) };
+
+                       req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
                        if (!req->r_path2) {
                                ceph_mdsc_put_request(req);
                                return -ENOMEM;
                        }
+
+                       err = ceph_encode_encrypted_dname(inode, &d_name,
+                                                         req->r_path2);
+                       if (err < 0) {
+                               ceph_mdsc_put_request(req);
+                               return err;
+                       }
                } else if (is_hash_order(ctx->pos)) {
                        req->r_args.readdir.offset_hash =
                                cpu_to_le32(fpos_hash(ctx->pos));
@@ -511,15 +529,20 @@ more:
        for (; i < rinfo->dir_nr; i++) {
                struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
 
-               BUG_ON(rde->offset < ctx->pos);
+               if (rde->offset < ctx->pos) {
+                       pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
+                               __func__, rde->offset, ctx->pos);
+                       return -EIO;
+               }
+
+               if (WARN_ON_ONCE(!rde->inode.in))
+                       return -EIO;
 
                ctx->pos = rde->offset;
                dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
                     i, rinfo->dir_nr, ctx->pos,
                     rde->name_len, rde->name, &rde->inode.in);
 
-               BUG_ON(!rde->inode.in);
-
                if (!dir_emit(ctx, rde->name, rde->name_len,
                              ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
                              le32_to_cpu(rde->inode.in->mode) >> 12)) {
@@ -532,6 +555,8 @@ more:
                        dout("filldir stopping us...\n");
                        return 0;
                }
+
+               /* Reset the lengths to their original allocated vals */
                ctx->pos++;
        }
 
@@ -586,7 +611,6 @@ more:
                                        dfi->dir_ordered_count);
                spin_unlock(&ci->i_ceph_lock);
        }
-
        dout("readdir %p file %p done.\n", inode, file);
        return 0;
 }
@@ -760,6 +784,18 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
 
+       if (IS_ENCRYPTED(dir)) {
+               bool had_key = fscrypt_has_encryption_key(dir);
+
+               err = fscrypt_prepare_lookup_partial(dir, dentry);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               /* mark directory as incomplete if it has been unlocked */
+               if (!had_key && fscrypt_has_encryption_key(dir))
+                       ceph_dir_clear_complete(dir);
+       }
+
        /* can we conclude ENOENT locally? */
        if (d_really_is_negative(dentry)) {
                struct ceph_inode_info *ci = ceph_inode(dir);
@@ -865,13 +901,6 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
-       err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-       if (err < 0)
-               goto out;
-       err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-       if (err < 0)
-               goto out;
-
        dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
             dir, dentry, mode, rdev);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
@@ -879,6 +908,17 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
                err = PTR_ERR(req);
                goto out;
        }
+
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
+       }
+
+       if (S_ISREG(mode) && IS_ENCRYPTED(dir))
+               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_parent = dir;
@@ -889,13 +929,13 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (!err)
@@ -912,12 +952,50 @@ static int ceph_create(struct mnt_idmap *idmap, struct inode *dir,
        return ceph_mknod(idmap, dir, dentry, mode, 0);
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+                                        const char *dest)
+{
+       int err;
+       int len = strlen(dest);
+       struct fscrypt_str osd_link = FSTR_INIT(NULL, 0);
+
+       err = fscrypt_prepare_symlink(req->r_parent, dest, len, PATH_MAX,
+                                     &osd_link);
+       if (err)
+               goto out;
+
+       err = fscrypt_encrypt_symlink(req->r_new_inode, dest, len, &osd_link);
+       if (err)
+               goto out;
+
+       req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
+       if (!req->r_path2) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
+       req->r_path2[len] = '\0';
+out:
+       fscrypt_fname_free_buffer(&osd_link);
+       return err;
+}
+#else
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+                                        const char *dest)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
                        struct dentry *dentry, const char *dest)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
        struct ceph_mds_request *req;
        struct ceph_acl_sec_ctx as_ctx = {};
+       umode_t mode = S_IFLNK | 0777;
        int err;
 
        if (ceph_snap(dir) != CEPH_NOSNAP)
@@ -932,38 +1010,48 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
-       err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx);
-       if (err < 0)
-               goto out;
-
        dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
                goto out;
        }
-       req->r_path2 = kstrdup(dest, GFP_KERNEL);
-       if (!req->r_path2) {
-               err = -ENOMEM;
-               ceph_mdsc_put_request(req);
-               goto out;
+
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
        }
+
        req->r_parent = dir;
        ihold(dir);
 
+       if (IS_ENCRYPTED(req->r_new_inode)) {
+               err = prep_encrypted_symlink_target(req, dest);
+               if (err)
+                       goto out_req;
+       } else {
+               req->r_path2 = kstrdup(dest, GFP_KERNEL);
+               if (!req->r_path2) {
+                       err = -ENOMEM;
+                       goto out_req;
+               }
+       }
+
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (err)
@@ -1003,14 +1091,12 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                err = -EDQUOT;
                goto out;
        }
-
-       mode |= S_IFDIR;
-       err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-       if (err < 0)
-               goto out;
-       err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-       if (err < 0)
+       if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
+           !fscrypt_has_encryption_key(dir)) {
+               err = -ENOKEY;
                goto out;
+       }
+
 
        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
        if (IS_ERR(req)) {
@@ -1018,6 +1104,14 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
+       mode |= S_IFDIR;
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
+       }
+
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_parent = dir;
@@ -1027,15 +1121,15 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err &&
            !req->r_reply_info.head->is_target &&
            !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (!err)
@@ -1063,6 +1157,10 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                return -EROFS;
 
+       err = fscrypt_prepare_link(old_dentry, dir, dentry);
+       if (err)
+               return err;
+
        dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
             dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
@@ -1310,6 +1408,11 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        if (err)
                return err;
 
+       err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
+                                    flags);
+       if (err)
+               return err;
+
        dout("rename dir %p dentry %p to dir %p dentry %p\n",
             old_dir, old_dentry, new_dir, new_dentry);
        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1765,6 +1868,10 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
        struct inode *dir, *inode;
        struct ceph_mds_client *mdsc;
 
+       valid = fscrypt_d_revalidate(dentry, flags);
+       if (valid <= 0)
+               return valid;
+
        if (flags & LOOKUP_RCU) {
                parent = READ_ONCE(dentry->d_parent);
                dir = d_inode_rcu(parent);
@@ -1777,8 +1884,9 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
                inode = d_inode(dentry);
        }
 
-       dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry,
-            dentry, inode, ceph_dentry(dentry)->offset);
+       dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
+            dentry, inode, ceph_dentry(dentry)->offset,
+            !!(dentry->d_flags & DCACHE_NOKEY_NAME));
 
        mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
 
index f780e4e..8559990 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 /*
  * Basic fh
@@ -535,7 +536,9 @@ static int ceph_get_name(struct dentry *parent, char *name,
 {
        struct ceph_mds_client *mdsc;
        struct ceph_mds_request *req;
+       struct inode *dir = d_inode(parent);
        struct inode *inode = d_inode(child);
+       struct ceph_mds_reply_info_parsed *rinfo;
        int err;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -547,30 +550,47 @@ static int ceph_get_name(struct dentry *parent, char *name,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       inode_lock(d_inode(parent));
-
+       inode_lock(dir);
        req->r_inode = inode;
        ihold(inode);
        req->r_ino2 = ceph_vino(d_inode(parent));
-       req->r_parent = d_inode(parent);
-       ihold(req->r_parent);
+       req->r_parent = dir;
+       ihold(dir);
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
+       inode_unlock(dir);
 
-       inode_unlock(d_inode(parent));
+       if (err)
+               goto out;
 
-       if (!err) {
-               struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+       rinfo = &req->r_reply_info;
+       if (!IS_ENCRYPTED(dir)) {
                memcpy(name, rinfo->dname, rinfo->dname_len);
                name[rinfo->dname_len] = 0;
-               dout("get_name %p ino %llx.%llx name %s\n",
-                    child, ceph_vinop(inode), name);
        } else {
-               dout("get_name %p ino %llx.%llx err %d\n",
-                    child, ceph_vinop(inode), err);
-       }
+               struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+               struct ceph_fname fname = { .dir        = dir,
+                                           .name       = rinfo->dname,
+                                           .ctext      = rinfo->altname,
+                                           .name_len   = rinfo->dname_len,
+                                           .ctext_len  = rinfo->altname_len };
+
+               err = ceph_fname_alloc_buffer(dir, &oname);
+               if (err < 0)
+                       goto out;
 
+               err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
+               if (!err) {
+                       memcpy(name, oname.name, oname.len);
+                       name[oname.len] = 0;
+               }
+               ceph_fname_free_buffer(dir, &oname);
+       }
+out:
+       dout("get_name %p ino %llx.%llx err %d %s%s\n",
+                    child, ceph_vinop(inode), err,
+                    err ? "" : "name ", err ? "" : name);
        ceph_mdsc_put_request(req);
        return err;
 }
index 63efe53..b1da02f 100644 (file)
@@ -366,8 +366,13 @@ int ceph_open(struct inode *inode, struct file *file)
 
        /* filter out O_CREAT|O_EXCL; vfs did that already.  yuck. */
        flags = file->f_flags & ~(O_CREAT|O_EXCL);
-       if (S_ISDIR(inode->i_mode))
+       if (S_ISDIR(inode->i_mode)) {
                flags = O_DIRECTORY;  /* mds likes to know */
+       } else if (S_ISREG(inode->i_mode)) {
+               err = fscrypt_file_open(inode, file);
+               if (err)
+                       return err;
+       }
 
        dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
             ceph_vinop(inode), file, flags, file->f_flags);
@@ -604,7 +609,8 @@ out:
        ceph_mdsc_release_dir_caps(req);
 }
 
-static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
+                                   struct dentry *dentry,
                                    struct file *file, umode_t mode,
                                    struct ceph_mds_request *req,
                                    struct ceph_acl_sec_ctx *as_ctx,
@@ -616,7 +622,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
        struct ceph_mds_reply_info_in iinfo = { .in = &in };
        struct ceph_inode_info *ci = ceph_inode(dir);
        struct ceph_dentry_info *di = ceph_dentry(dentry);
-       struct inode *inode;
        struct timespec64 now;
        struct ceph_string *pool_ns;
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
@@ -625,10 +630,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
 
        ktime_get_real_ts64(&now);
 
-       inode = ceph_get_inode(dentry->d_sb, vino);
-       if (IS_ERR(inode))
-               return PTR_ERR(inode);
-
        iinfo.inline_version = CEPH_INLINE_NONE;
        iinfo.change_attr = 1;
        ceph_encode_timespec64(&iinfo.btime, &now);
@@ -686,8 +687,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
                ceph_dir_clear_complete(dir);
                if (!d_unhashed(dentry))
                        d_drop(dentry);
-               if (inode->i_state & I_NEW)
-                       discard_new_inode(inode);
+               discard_new_inode(inode);
        } else {
                struct dentry *dn;
 
@@ -733,6 +733,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
+       struct inode *new_inode = NULL;
        struct dentry *dn;
        struct ceph_acl_sec_ctx as_ctx = {};
        bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
@@ -755,15 +756,16 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
         */
        flags &= ~O_TRUNC;
 
+retry:
        if (flags & O_CREAT) {
                if (ceph_quota_is_max_files_exceeded(dir))
                        return -EDQUOT;
-               err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-               if (err < 0)
-                       return err;
-               err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-               if (err < 0)
+
+               new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+               if (IS_ERR(new_inode)) {
+                       err = PTR_ERR(new_inode);
                        goto out_ctx;
+               }
                /* Async create can't handle more than a page of xattrs */
                if (as_ctx.pagelist &&
                    !list_is_singular(&as_ctx.pagelist->head))
@@ -772,7 +774,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                /* If it's not being looked up, it's negative */
                return -ENOENT;
        }
-retry:
+
        /* do the open */
        req = prepare_open_request(dir->i_sb, flags, mode);
        if (IS_ERR(req)) {
@@ -787,6 +789,12 @@ retry:
        req->r_args.open.mask = cpu_to_le32(mask);
        req->r_parent = dir;
        ihold(dir);
+       if (IS_ENCRYPTED(dir)) {
+               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+               err = fscrypt_prepare_lookup_partial(dir, dentry);
+               if (err < 0)
+                       goto out_req;
+       }
 
        if (flags & O_CREAT) {
                struct ceph_file_layout lo;
@@ -794,32 +802,47 @@ retry:
                req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                                     CEPH_CAP_XATTR_EXCL;
                req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-               if (as_ctx.pagelist) {
-                       req->r_pagelist = as_ctx.pagelist;
-                       as_ctx.pagelist = NULL;
-               }
-               if (try_async &&
-                   (req->r_dir_caps =
-                     try_prep_async_create(dir, dentry, &lo,
-                                           &req->r_deleg_ino))) {
+
+               ceph_as_ctx_to_req(req, &as_ctx);
+
+               if (try_async && (req->r_dir_caps =
+                                 try_prep_async_create(dir, dentry, &lo,
+                                                       &req->r_deleg_ino))) {
+                       struct ceph_vino vino = { .ino = req->r_deleg_ino,
+                                                 .snap = CEPH_NOSNAP };
                        struct ceph_dentry_info *di = ceph_dentry(dentry);
 
                        set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
                        req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
                        req->r_callback = ceph_async_create_cb;
 
+                       /* Hash inode before RPC */
+                       new_inode = ceph_get_inode(dir->i_sb, vino, new_inode);
+                       if (IS_ERR(new_inode)) {
+                               err = PTR_ERR(new_inode);
+                               new_inode = NULL;
+                               goto out_req;
+                       }
+                       WARN_ON_ONCE(!(new_inode->i_state & I_NEW));
+
                        spin_lock(&dentry->d_lock);
                        di->flags |= CEPH_DENTRY_ASYNC_CREATE;
                        spin_unlock(&dentry->d_lock);
 
                        err = ceph_mdsc_submit_request(mdsc, dir, req);
                        if (!err) {
-                               err = ceph_finish_async_create(dir, dentry,
-                                                       file, mode, req,
-                                                       &as_ctx, &lo);
+                               err = ceph_finish_async_create(dir, new_inode,
+                                                              dentry, file,
+                                                              mode, req,
+                                                              &as_ctx, &lo);
+                               new_inode = NULL;
                        } else if (err == -EJUKEBOX) {
                                restore_deleg_ino(dir, req->r_deleg_ino);
                                ceph_mdsc_put_request(req);
+                               discard_new_inode(new_inode);
+                               ceph_release_acl_sec_ctx(&as_ctx);
+                               memset(&as_ctx, 0, sizeof(as_ctx));
+                               new_inode = NULL;
                                try_async = false;
                                ceph_put_string(rcu_dereference_raw(lo.pool_ns));
                                goto retry;
@@ -830,6 +853,8 @@ retry:
        }
 
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+       req->r_new_inode = new_inode;
+       new_inode = NULL;
        err = ceph_mdsc_do_request(mdsc, (flags & O_CREAT) ? dir : NULL, req);
        if (err == -ENOENT) {
                dentry = ceph_handle_snapdir(req, dentry);
@@ -858,6 +883,13 @@ retry:
                dout("atomic_open finish_no_open on dn %p\n", dn);
                err = finish_no_open(file, dn);
        } else {
+               if (IS_ENCRYPTED(dir) &&
+                   !fscrypt_has_permitted_context(dir, d_inode(dentry))) {
+                       pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n",
+                               ceph_vinop(dir), ceph_vinop(d_inode(dentry)));
+                       goto out_req;
+               }
+
                dout("atomic_open finish_open on dn %p\n", dn);
                if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
                        struct inode *newino = d_inode(dentry);
@@ -870,6 +902,7 @@ retry:
        }
 out_req:
        ceph_mdsc_put_request(req);
+       iput(new_inode);
 out_ctx:
        ceph_release_acl_sec_ctx(&as_ctx);
        dout("atomic_open result=%d\n", err);
@@ -924,21 +957,24 @@ enum {
  * If we get a short result from the OSD, check against i_size; we need to
  * only return a short read to the caller if we hit EOF.
  */
-static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
-                             int *retry_op)
+ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                        struct iov_iter *to, int *retry_op,
+                        u64 *last_objver)
 {
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        ssize_t ret;
-       u64 off = iocb->ki_pos;
+       u64 off = *ki_pos;
        u64 len = iov_iter_count(to);
        u64 i_size = i_size_read(inode);
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 objver = 0;
 
-       dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
-            (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+       dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
+
+       if (ceph_inode_is_shutdown(inode))
+               return -EIO;
 
        if (!len)
                return 0;
@@ -962,10 +998,21 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                bool more;
                int idx;
                size_t left;
+               struct ceph_osd_req_op *op;
+               u64 read_off = off;
+               u64 read_len = len;
+
+               /* determine new offset/length if encrypted */
+               ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+
+               dout("sync_read orig %llu~%llu reading %llu~%llu",
+                    off, len, read_off, read_len);
 
                req = ceph_osdc_new_request(osdc, &ci->i_layout,
-                                       ci->i_vino, off, &len, 0, 1,
-                                       CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+                                       ci->i_vino, read_off, &read_len, 0, 1,
+                                       sparse ? CEPH_OSD_OP_SPARSE_READ :
+                                                CEPH_OSD_OP_READ,
+                                       CEPH_OSD_FLAG_READ,
                                        NULL, ci->i_truncate_seq,
                                        ci->i_truncate_size, false);
                if (IS_ERR(req)) {
@@ -973,10 +1020,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
                }
 
+               /* adjust len downward if the request truncated the len */
+               if (off + len > read_off + read_len)
+                       len = read_off + read_len - off;
                more = len < iov_iter_count(to);
 
-               num_pages = calc_pages_for(off, len);
-               page_off = off & ~PAGE_MASK;
+               num_pages = calc_pages_for(read_off, read_len);
+               page_off = offset_in_page(off);
                pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages)) {
                        ceph_osdc_put_request(req);
@@ -984,29 +1034,75 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
                }
 
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
+               osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
+                                                offset_in_page(read_off),
                                                 false, false);
+
+               op = &req->r_ops[0];
+               if (sparse) {
+                       ret = ceph_alloc_sparse_ext_map(op);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+               }
+
                ceph_osdc_start_request(osdc, req);
                ret = ceph_osdc_wait_request(osdc, req);
 
                ceph_update_read_metrics(&fsc->mdsc->metric,
                                         req->r_start_latency,
                                         req->r_end_latency,
-                                        len, ret);
+                                        read_len, ret);
 
-               ceph_osdc_put_request(req);
+               if (ret > 0)
+                       objver = req->r_version;
 
                i_size = i_size_read(inode);
                dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
                     off, len, ret, i_size, (more ? " MORE" : ""));
 
-               if (ret == -ENOENT)
+               /* Fix it to go to end of extent map */
+               if (sparse && ret >= 0)
+                       ret = ceph_sparse_ext_map_end(op);
+               else if (ret == -ENOENT)
                        ret = 0;
+
+               if (ret > 0 && IS_ENCRYPTED(inode)) {
+                       int fret;
+
+                       fret = ceph_fscrypt_decrypt_extents(inode, pages,
+                                       read_off, op->extent.sparse_ext,
+                                       op->extent.sparse_ext_cnt);
+                       if (fret < 0) {
+                               ret = fret;
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+
+                       /* account for any partial block at the beginning */
+                       fret -= (off - read_off);
+
+                       /*
+                        * Short read after big offset adjustment?
+                        * Nothing is usable, just call it a zero
+                        * len read.
+                        */
+                       fret = max(fret, 0);
+
+                       /* account for partial block at the end */
+                       ret = min_t(ssize_t, fret, len);
+               }
+
+               ceph_osdc_put_request(req);
+
+               /* Short read but not EOF? Zero out the remainder. */
                if (ret >= 0 && ret < len && (off + ret < i_size)) {
                        int zlen = min(len - ret, i_size - off - ret);
                        int zoff = page_off + ret;
+
                        dout("sync_read zero gap %llu~%llu\n",
-                             off + ret, off + ret + zlen);
+                               off + ret, off + ret + zlen);
                        ceph_zero_page_vector_range(zoff, zlen, pages);
                        ret += zlen;
                }
@@ -1014,15 +1110,16 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                idx = 0;
                left = ret > 0 ? ret : 0;
                while (left > 0) {
-                       size_t len, copied;
-                       page_off = off & ~PAGE_MASK;
-                       len = min_t(size_t, left, PAGE_SIZE - page_off);
+                       size_t plen, copied;
+
+                       plen = min_t(size_t, left, PAGE_SIZE - page_off);
                        SetPageUptodate(pages[idx]);
                        copied = copy_page_to_iter(pages[idx++],
-                                                  page_off, len, to);
+                                                  page_off, plen, to);
                        off += copied;
                        left -= copied;
-                       if (copied < len) {
+                       page_off = 0;
+                       if (copied < plen) {
                                ret = -EFAULT;
                                break;
                        }
@@ -1039,21 +1136,37 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
        }
 
-       if (off > iocb->ki_pos) {
-               if (off >= i_size) {
-                       *retry_op = CHECK_EOF;
-                       ret = i_size - iocb->ki_pos;
-                       iocb->ki_pos = i_size;
-               } else {
-                       ret = off - iocb->ki_pos;
-                       iocb->ki_pos = off;
+       if (ret > 0) {
+               if (off > *ki_pos) {
+                       if (off >= i_size) {
+                               *retry_op = CHECK_EOF;
+                               ret = i_size - *ki_pos;
+                               *ki_pos = i_size;
+                       } else {
+                               ret = off - *ki_pos;
+                               *ki_pos = off;
+                       }
                }
-       }
 
+               if (last_objver)
+                       *last_objver = objver;
+       }
        dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
        return ret;
 }
 
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
+                             int *retry_op)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+
+       dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos,
+            iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+
+       return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
+}
+
 struct ceph_aio_request {
        struct kiocb *iocb;
        size_t total_len;
@@ -1125,8 +1238,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        struct inode *inode = req->r_inode;
        struct ceph_aio_request *aio_req = req->r_priv;
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       struct ceph_osd_req_op *op = &req->r_ops[0];
        struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
        unsigned int len = osd_data->bvec_pos.iter.bi_size;
+       bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
 
        BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
        BUG_ON(!osd_data->num_bvecs);
@@ -1147,6 +1262,8 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
                }
                rc = -ENOMEM;
        } else if (!aio_req->write) {
+               if (sparse && rc >= 0)
+                       rc = ceph_sparse_ext_map_end(op);
                if (rc == -ENOENT)
                        rc = 0;
                if (rc >= 0 && len > rc) {
@@ -1283,6 +1400,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        loff_t pos = iocb->ki_pos;
        bool write = iov_iter_rw(iter) == WRITE;
        bool should_dirty = !write && user_backed_iter(iter);
+       bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
 
        if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
@@ -1310,6 +1428,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        while (iov_iter_count(iter) > 0) {
                u64 size = iov_iter_count(iter);
                ssize_t len;
+               struct ceph_osd_req_op *op;
+               int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
 
                if (write)
                        size = min_t(u64, size, fsc->mount_options->wsize);
@@ -1320,8 +1440,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                            vino, pos, &size, 0,
                                            1,
-                                           write ? CEPH_OSD_OP_WRITE :
-                                                   CEPH_OSD_OP_READ,
+                                           write ? CEPH_OSD_OP_WRITE : readop,
                                            flags, snapc,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
@@ -1372,6 +1491,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                }
 
                osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+               op = &req->r_ops[0];
+               if (sparse) {
+                       ret = ceph_alloc_sparse_ext_map(op);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+               }
 
                if (aio_req) {
                        aio_req->total_len += len;
@@ -1399,8 +1526,11 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 
                size = i_size_read(inode);
                if (!write) {
-                       if (ret == -ENOENT)
+                       if (sparse && ret >= 0)
+                               ret = ceph_sparse_ext_map_end(op);
+                       else if (ret == -ENOENT)
                                ret = 0;
+
                        if (ret >= 0 && ret < len && pos + ret < size) {
                                struct iov_iter i;
                                int zlen = min_t(size_t, len - ret,
@@ -1481,13 +1611,12 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_vino vino;
+       struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_osd_request *req;
        struct page **pages;
        u64 len;
        int num_pages;
        int written = 0;
-       int flags;
        int ret;
        bool check_caps = false;
        struct timespec64 mtime = current_time(inode);
@@ -1505,79 +1634,350 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                return ret;
 
        ceph_fscache_invalidate(inode, false);
-       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                           pos >> PAGE_SHIFT,
-                                           (pos + count - 1) >> PAGE_SHIFT);
-       if (ret < 0)
-               dout("invalidate_inode_pages2_range returned %d\n", ret);
-
-       flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
 
        while ((len = iov_iter_count(from)) > 0) {
                size_t left;
                int n;
-
-               vino = ceph_vino(inode);
-               req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                                           vino, pos, &len, 0, 1,
-                                           CEPH_OSD_OP_WRITE, flags, snapc,
-                                           ci->i_truncate_seq,
-                                           ci->i_truncate_size,
-                                           false);
-               if (IS_ERR(req)) {
-                       ret = PTR_ERR(req);
-                       break;
-               }
+               u64 write_pos = pos;
+               u64 write_len = len;
+               u64 objnum, objoff;
+               u32 xlen;
+               u64 assert_ver = 0;
+               bool rmw;
+               bool first, last;
+               struct iov_iter saved_iter = *from;
+               size_t off;
+
+               ceph_fscrypt_adjust_off_and_len(inode, &write_pos, &write_len);
+
+               /* clamp the length to the end of first object */
+               ceph_calc_file_object_mapping(&ci->i_layout, write_pos,
+                                             write_len, &objnum, &objoff,
+                                             &xlen);
+               write_len = xlen;
+
+               /* adjust len downward if it goes beyond current object */
+               if (pos + len > write_pos + write_len)
+                       len = write_pos + write_len - pos;
 
                /*
-                * write from beginning of first page,
-                * regardless of io alignment
+                * If we had to adjust the length or position to align with a
+                * crypto block, then we must do a read/modify/write cycle. We
+                * use a version assertion to redrive the thing if something
+                * changes in between.
                 */
-               num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               first = pos != write_pos;
+               last = (pos + len) != (write_pos + write_len);
+               rmw = first || last;
+
+               dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
+                    ci->i_vino.ino, pos, len, write_pos, write_len,
+                    rmw ? "" : "no ");
 
+               /*
+                * The data is emplaced into the page as it would be if it were
+                * in an array of pagecache pages.
+                */
+               num_pages = calc_pages_for(write_pos, write_len);
                pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
-                       goto out;
+                       break;
+               }
+
+               /* Do we need to preload the pages? */
+               if (rmw) {
+                       u64 first_pos = write_pos;
+                       u64 last_pos = (write_pos + write_len) - CEPH_FSCRYPT_BLOCK_SIZE;
+                       u64 read_len = CEPH_FSCRYPT_BLOCK_SIZE;
+                       struct ceph_osd_req_op *op;
+
+                       /* We should only need to do this for encrypted inodes */
+                       WARN_ON_ONCE(!IS_ENCRYPTED(inode));
+
+                       /* No need to do two reads if first and last blocks are same */
+                       if (first && last_pos == first_pos)
+                               last = false;
+
+                       /*
+                        * Allocate a read request for one or two extents,
+                        * depending on how the request was aligned.
+                        */
+                       req = ceph_osdc_new_request(osdc, &ci->i_layout,
+                                       ci->i_vino, first ? first_pos : last_pos,
+                                       &read_len, 0, (first && last) ? 2 : 1,
+                                       CEPH_OSD_OP_SPARSE_READ, CEPH_OSD_FLAG_READ,
+                                       NULL, ci->i_truncate_seq,
+                                       ci->i_truncate_size, false);
+                       if (IS_ERR(req)) {
+                               ceph_release_page_vector(pages, num_pages);
+                               ret = PTR_ERR(req);
+                               break;
+                       }
+
+                       /* Something is misaligned! */
+                       if (read_len != CEPH_FSCRYPT_BLOCK_SIZE) {
+                               ceph_osdc_put_request(req);
+                               ceph_release_page_vector(pages, num_pages);
+                               ret = -EIO;
+                               break;
+                       }
+
+                       /* Add extent for first block? */
+                       op = &req->r_ops[0];
+
+                       if (first) {
+                               osd_req_op_extent_osd_data_pages(req, 0, pages,
+                                                        CEPH_FSCRYPT_BLOCK_SIZE,
+                                                        offset_in_page(first_pos),
+                                                        false, false);
+                               /* We only expect a single extent here */
+                               ret = __ceph_alloc_sparse_ext_map(op, 1);
+                               if (ret) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+                       }
+
+                       /* Add extent for last block */
+                       if (last) {
+                               /* Init the other extent if first extent has been used */
+                               if (first) {
+                                       op = &req->r_ops[1];
+                                       osd_req_op_extent_init(req, 1,
+                                                       CEPH_OSD_OP_SPARSE_READ,
+                                                       last_pos, CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       ci->i_truncate_size,
+                                                       ci->i_truncate_seq);
+                               }
+
+                               ret = __ceph_alloc_sparse_ext_map(op, 1);
+                               if (ret) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               osd_req_op_extent_osd_data_pages(req, first ? 1 : 0,
+                                                       &pages[num_pages - 1],
+                                                       CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(last_pos),
+                                                       false, false);
+                       }
+
+                       ceph_osdc_start_request(osdc, req);
+                       ret = ceph_osdc_wait_request(osdc, req);
+
+                       /* FIXME: length field is wrong if there are 2 extents */
+                       ceph_update_read_metrics(&fsc->mdsc->metric,
+                                                req->r_start_latency,
+                                                req->r_end_latency,
+                                                read_len, ret);
+
+                       /* Ok if object is not already present */
+                       if (ret == -ENOENT) {
+                               /*
+                                * If there is no object, then we can't assert
+                                * on its version. Set it to 0, and we'll use an
+                                * exclusive create instead.
+                                */
+                               ceph_osdc_put_request(req);
+                               ret = 0;
+
+                               /*
+                                * zero out the soon-to-be uncopied parts of the
+                                * first and last pages.
+                                */
+                               if (first)
+                                       zero_user_segment(pages[0], 0,
+                                                         offset_in_page(first_pos));
+                               if (last)
+                                       zero_user_segment(pages[num_pages - 1],
+                                                         offset_in_page(last_pos),
+                                                         PAGE_SIZE);
+                       } else {
+                               if (ret < 0) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               op = &req->r_ops[0];
+                               if (op->extent.sparse_ext_cnt == 0) {
+                                       if (first)
+                                               zero_user_segment(pages[0], 0,
+                                                                 offset_in_page(first_pos));
+                                       else
+                                               zero_user_segment(pages[num_pages - 1],
+                                                                 offset_in_page(last_pos),
+                                                                 PAGE_SIZE);
+                               } else if (op->extent.sparse_ext_cnt != 1 ||
+                                          ceph_sparse_ext_map_end(op) !=
+                                               CEPH_FSCRYPT_BLOCK_SIZE) {
+                                       ret = -EIO;
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               if (first && last) {
+                                       op = &req->r_ops[1];
+                                       if (op->extent.sparse_ext_cnt == 0) {
+                                               zero_user_segment(pages[num_pages - 1],
+                                                                 offset_in_page(last_pos),
+                                                                 PAGE_SIZE);
+                                       } else if (op->extent.sparse_ext_cnt != 1 ||
+                                                  ceph_sparse_ext_map_end(op) !=
+                                                       CEPH_FSCRYPT_BLOCK_SIZE) {
+                                               ret = -EIO;
+                                               ceph_osdc_put_request(req);
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+
+                               /* Grab assert version. It must be non-zero. */
+                               assert_ver = req->r_version;
+                               WARN_ON_ONCE(ret > 0 && assert_ver == 0);
+
+                               ceph_osdc_put_request(req);
+                               if (first) {
+                                       ret = ceph_fscrypt_decrypt_block_inplace(inode,
+                                                       pages[0], CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(first_pos),
+                                                       first_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+                                       if (ret < 0) {
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+                               if (last) {
+                                       ret = ceph_fscrypt_decrypt_block_inplace(inode,
+                                                       pages[num_pages - 1],
+                                                       CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(last_pos),
+                                                       last_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+                                       if (ret < 0) {
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+                       }
                }
 
                left = len;
+               off = offset_in_page(pos);
                for (n = 0; n < num_pages; n++) {
-                       size_t plen = min_t(size_t, left, PAGE_SIZE);
-                       ret = copy_page_from_iter(pages[n], 0, plen, from);
+                       size_t plen = min_t(size_t, left, PAGE_SIZE - off);
+
+                       /* copy the data */
+                       ret = copy_page_from_iter(pages[n], off, plen, from);
                        if (ret != plen) {
                                ret = -EFAULT;
                                break;
                        }
+                       off = 0;
                        left -= ret;
                }
-
                if (ret < 0) {
+                       dout("sync_write write failed with %d\n", ret);
                        ceph_release_page_vector(pages, num_pages);
-                       goto out;
+                       break;
                }
 
-               req->r_inode = inode;
+               if (IS_ENCRYPTED(inode)) {
+                       ret = ceph_fscrypt_encrypt_pages(inode, pages,
+                                                        write_pos, write_len,
+                                                        GFP_KERNEL);
+                       if (ret < 0) {
+                               dout("encryption failed with %d\n", ret);
+                               ceph_release_page_vector(pages, num_pages);
+                               break;
+                       }
+               }
 
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
-                                               false, true);
+               req = ceph_osdc_new_request(osdc, &ci->i_layout,
+                                           ci->i_vino, write_pos, &write_len,
+                                           rmw ? 1 : 0, rmw ? 2 : 1,
+                                           CEPH_OSD_OP_WRITE,
+                                           CEPH_OSD_FLAG_WRITE,
+                                           snapc, ci->i_truncate_seq,
+                                           ci->i_truncate_size, false);
+               if (IS_ERR(req)) {
+                       ret = PTR_ERR(req);
+                       ceph_release_page_vector(pages, num_pages);
+                       break;
+               }
 
+               dout("sync_write write op %lld~%llu\n", write_pos, write_len);
+               osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len,
+                                                offset_in_page(write_pos), false,
+                                                true);
+               req->r_inode = inode;
                req->r_mtime = mtime;
-               ceph_osdc_start_request(&fsc->client->osdc, req);
-               ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+               /* Set up the assertion */
+               if (rmw) {
+                       /*
+                        * Set up the assertion. If we don't have a version
+                        * number, then the object doesn't exist yet. Use an
+                        * exclusive create instead of a version assertion in
+                        * that case.
+                        */
+                       if (assert_ver) {
+                               osd_req_op_init(req, 0, CEPH_OSD_OP_ASSERT_VER, 0);
+                               req->r_ops[0].assert_ver.ver = assert_ver;
+                       } else {
+                               osd_req_op_init(req, 0, CEPH_OSD_OP_CREATE,
+                                               CEPH_OSD_OP_FLAG_EXCL);
+                       }
+               }
+
+               ceph_osdc_start_request(osdc, req);
+               ret = ceph_osdc_wait_request(osdc, req);
 
                ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                          req->r_end_latency, len, ret);
-out:
                ceph_osdc_put_request(req);
                if (ret != 0) {
+                       dout("sync_write osd write returned %d\n", ret);
+                       /* Version changed! Must re-do the rmw cycle */
+                       if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) ||
+                           (!assert_ver && ret == -EEXIST)) {
+                               /* We should only ever see this on a rmw */
+                               WARN_ON_ONCE(!rmw);
+
+                               /* The version should never go backward */
+                               WARN_ON_ONCE(ret == -EOVERFLOW);
+
+                               *from = saved_iter;
+
+                               /* FIXME: limit number of times we loop? */
+                               continue;
+                       }
                        ceph_set_error_write(ci);
                        break;
                }
 
                ceph_clear_error_write(ci);
+
+               /*
+                * We successfully wrote to a range of the file. Declare
+                * that region of the pagecache invalid.
+                */
+               ret = invalidate_inode_pages2_range(
+                               inode->i_mapping,
+                               pos >> PAGE_SHIFT,
+                               (pos + len - 1) >> PAGE_SHIFT);
+               if (ret < 0) {
+                       dout("invalidate_inode_pages2_range returned %d\n",
+                            ret);
+                       ret = 0;
+               }
                pos += len;
                written += len;
+               dout("sync_write written %d\n", written);
                if (pos > i_size_read(inode)) {
                        check_caps = ceph_inode_set_size(inode, pos);
                        if (check_caps)
@@ -1591,6 +1991,7 @@ out:
                ret = written;
                iocb->ki_pos = pos;
        }
+       dout("sync_write returning %d\n", ret);
        return ret;
 }
 
@@ -1648,7 +2049,9 @@ again:
                     ceph_cap_string(got));
 
                if (!ceph_has_inline_data(ci)) {
-                       if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                       if (!retry_op &&
+                           (iocb->ki_flags & IOCB_DIRECT) &&
+                           !IS_ENCRYPTED(inode)) {
                                ret = ceph_direct_read_write(iocb, to,
                                                             NULL, NULL);
                                if (ret >= 0 && ret < len)
@@ -1934,7 +2337,7 @@ retry_snap:
 
                /* we might need to revert back to that point */
                data = *from;
-               if (iocb->ki_flags & IOCB_DIRECT)
+               if ((iocb->ki_flags & IOCB_DIRECT) && !IS_ENCRYPTED(inode))
                        written = ceph_direct_read_write(iocb, &data, snapc,
                                                         &prealloc_cf);
                else
@@ -2165,6 +2568,9 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
+       if (IS_ENCRYPTED(inode))
+               return -EOPNOTSUPP;
+
        prealloc_cf = ceph_alloc_cap_flush();
        if (!prealloc_cf)
                return -ENOMEM;
@@ -2486,6 +2892,10 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                return -EOPNOTSUPP;
        }
 
+       /* Every encrypted inode gets its own key, so we can't offload them */
+       if (IS_ENCRYPTED(src_inode) || IS_ENCRYPTED(dst_inode))
+               return -EOPNOTSUPP;
+
        if (len < src_ci->i_layout.object_size)
                return -EOPNOTSUPP; /* no remote copy will be done */
 
index fd05d68..800ab79 100644 (file)
 #include <linux/random.h>
 #include <linux/sort.h>
 #include <linux/iversion.h>
+#include <linux/fscrypt.h>
 
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 #include <linux/ceph/decode.h>
 
 /*
@@ -33,6 +35,7 @@
  */
 
 static const struct inode_operations ceph_symlink_iops;
+static const struct inode_operations ceph_encrypted_symlink_iops;
 
 static void ceph_inode_work(struct work_struct *work);
 
@@ -52,17 +55,99 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
        return 0;
 }
 
-struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
+/**
+ * ceph_new_inode - allocate a new inode in advance of an expected create
+ * @dir: parent directory for new inode
+ * @dentry: dentry that may eventually point to new inode
+ * @mode: mode of new inode
+ * @as_ctx: pointer to inherited security context
+ *
+ * Allocate a new inode in advance of an operation to create a new inode.
+ * This allocates the inode and sets up the acl_sec_ctx with appropriate
+ * info for the new inode.
+ *
+ * Returns a pointer to the new inode or an ERR_PTR.
+ */
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+                            umode_t *mode, struct ceph_acl_sec_ctx *as_ctx)
+{
+       int err;
+       struct inode *inode;
+
+       inode = new_inode(dir->i_sb);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       if (!S_ISLNK(*mode)) {
+               err = ceph_pre_init_acls(dir, mode, as_ctx);
+               if (err < 0)
+                       goto out_err;
+       }
+
+       inode->i_state = 0;
+       inode->i_mode = *mode;
+
+       err = ceph_security_init_secctx(dentry, *mode, as_ctx);
+       if (err < 0)
+               goto out_err;
+
+       /*
+        * We'll skip setting fscrypt context for snapshots, leaving that for
+        * the handle_reply().
+        */
+       if (ceph_snap(dir) != CEPH_SNAPDIR) {
+               err = ceph_fscrypt_prepare_context(dir, inode, as_ctx);
+               if (err)
+                       goto out_err;
+       }
+
+       return inode;
+out_err:
+       iput(inode);
+       return ERR_PTR(err);
+}
+
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+                       struct ceph_acl_sec_ctx *as_ctx)
+{
+       if (as_ctx->pagelist) {
+               req->r_pagelist = as_ctx->pagelist;
+               as_ctx->pagelist = NULL;
+       }
+       ceph_fscrypt_as_ctx_to_req(req, as_ctx);
+}
+
+/**
+ * ceph_get_inode - find or create/hash a new inode
+ * @sb: superblock to search and allocate in
+ * @vino: vino to search for
+ * @newino: optional new inode to insert if one isn't found (may be NULL)
+ *
+ * Search for or insert a new inode into the hash for the given vino, and
+ * return a reference to it. If new is non-NULL, its reference is consumed.
+ */
+struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
+                            struct inode *newino)
 {
        struct inode *inode;
 
        if (ceph_vino_is_reserved(vino))
                return ERR_PTR(-EREMOTEIO);
 
-       inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
-                            ceph_set_ino_cb, &vino);
-       if (!inode)
+       if (newino) {
+               inode = inode_insert5(newino, (unsigned long)vino.ino,
+                                     ceph_ino_compare, ceph_set_ino_cb, &vino);
+               if (inode != newino)
+                       iput(newino);
+       } else {
+               inode = iget5_locked(sb, (unsigned long)vino.ino,
+                                    ceph_ino_compare, ceph_set_ino_cb, &vino);
+       }
+
+       if (!inode) {
+               dout("No inode found for %llx.%llx\n", vino.ino, vino.snap);
                return ERR_PTR(-ENOMEM);
+       }
 
        dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode),
             ceph_vinop(inode), inode, !!(inode->i_state & I_NEW));
@@ -78,8 +163,9 @@ struct inode *ceph_get_snapdir(struct inode *parent)
                .ino = ceph_ino(parent),
                .snap = CEPH_SNAPDIR,
        };
-       struct inode *inode = ceph_get_inode(parent->i_sb, vino);
+       struct inode *inode = ceph_get_inode(parent->i_sb, vino, NULL);
        struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret = -ENOTDIR;
 
        if (IS_ERR(inode))
                return inode;
@@ -105,6 +191,24 @@ struct inode *ceph_get_snapdir(struct inode *parent)
        ci->i_rbytes = 0;
        ci->i_btime = ceph_inode(parent)->i_btime;
 
+#ifdef CONFIG_FS_ENCRYPTION
+       /* if encrypted, just borrow fscrypt_auth from parent */
+       if (IS_ENCRYPTED(parent)) {
+               struct ceph_inode_info *pci = ceph_inode(parent);
+
+               ci->fscrypt_auth = kmemdup(pci->fscrypt_auth,
+                                          pci->fscrypt_auth_len,
+                                          GFP_KERNEL);
+               if (ci->fscrypt_auth) {
+                       inode->i_flags |= S_ENCRYPTED;
+                       ci->fscrypt_auth_len = pci->fscrypt_auth_len;
+               } else {
+                       dout("Failed to alloc snapdir fscrypt_auth\n");
+                       ret = -ENOMEM;
+                       goto err;
+               }
+       }
+#endif
        if (inode->i_state & I_NEW) {
                inode->i_op = &ceph_snapdir_iops;
                inode->i_fop = &ceph_snapdir_fops;
@@ -118,7 +222,7 @@ err:
                discard_new_inode(inode);
        else
                iput(inode);
-       return ERR_PTR(-ENOTDIR);
+       return ERR_PTR(ret);
 }
 
 const struct inode_operations ceph_file_iops = {
@@ -517,6 +621,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_truncate_seq = 0;
        ci->i_truncate_size = 0;
        ci->i_truncate_pending = 0;
+       ci->i_truncate_pagecache_size = 0;
 
        ci->i_max_size = 0;
        ci->i_reported_size = 0;
@@ -547,6 +652,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_WORK(&ci->i_work, ceph_inode_work);
        ci->i_work_mask = 0;
        memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
+#ifdef CONFIG_FS_ENCRYPTION
+       ci->fscrypt_auth = NULL;
+       ci->fscrypt_auth_len = 0;
+#endif
        return &ci->netfs.inode;
 }
 
@@ -555,6 +664,10 @@ void ceph_free_inode(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
 
        kfree(ci->i_symlink);
+#ifdef CONFIG_FS_ENCRYPTION
+       kfree(ci->fscrypt_auth);
+#endif
+       fscrypt_free_inode(inode);
        kmem_cache_free(ceph_inode_cachep, ci);
 }
 
@@ -575,6 +688,7 @@ void ceph_evict_inode(struct inode *inode)
        clear_inode(inode);
 
        ceph_fscache_unregister_inode_cookie(ci);
+       fscrypt_put_encryption_info(inode);
 
        __ceph_remove_caps(ci);
 
@@ -650,7 +764,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
                        ceph_fscache_update(inode);
                ci->i_reported_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
-                       dout("truncate_seq %u -> %u\n",
+                       dout("%s truncate_seq %u -> %u\n", __func__,
                             ci->i_truncate_seq, truncate_seq);
                        ci->i_truncate_seq = truncate_seq;
 
@@ -674,11 +788,26 @@ int ceph_fill_file_size(struct inode *inode, int issued,
                        }
                }
        }
-       if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
-           ci->i_truncate_size != truncate_size) {
-               dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
-                    truncate_size);
+
+       /*
+        * It's possible that the new sizes of the two consecutive
+        * size truncations will be in the same fscrypt last block,
+        * and we need to truncate the corresponding page caches
+        * anyway.
+        */
+       if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) {
+               dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__,
+                    ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode));
+
                ci->i_truncate_size = truncate_size;
+
+               if (IS_ENCRYPTED(inode)) {
+                       dout("%s truncate_pagecache_size %lld -> %llu\n",
+                            __func__, ci->i_truncate_pagecache_size, size);
+                       ci->i_truncate_pagecache_size = size;
+               } else {
+                       ci->i_truncate_pagecache_size = truncate_size;
+               }
        }
        return queue_trunc;
 }
@@ -752,6 +881,34 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                     inode, time_warp_seq, ci->i_time_warp_seq);
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
+{
+       int declen;
+       u8 *sym;
+
+       sym = kmalloc(enclen + 1, GFP_NOFS);
+       if (!sym)
+               return -ENOMEM;
+
+       declen = ceph_base64_decode(encsym, enclen, sym);
+       if (declen < 0) {
+               pr_err("%s: can't decode symlink (%d). Content: %.*s\n",
+                      __func__, declen, enclen, encsym);
+               kfree(sym);
+               return -EIO;
+       }
+       sym[declen + 1] = '\0';
+       *decsym = sym;
+       return declen;
+}
+#else
+static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Populate an inode based on info from mds.  May be called on new or
  * existing inodes.
@@ -857,15 +1014,20 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
        issued |= __ceph_caps_dirty(ci);
        new_issued = ~issued & info_caps;
 
-       /* directories have fl_stripe_unit set to zero */
-       if (le32_to_cpu(info->layout.fl_stripe_unit))
-               inode->i_blkbits =
-                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
-       else
-               inode->i_blkbits = CEPH_BLOCK_SHIFT;
-
        __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
 
+#ifdef CONFIG_FS_ENCRYPTION
+       if (iinfo->fscrypt_auth_len &&
+           ((inode->i_state & I_NEW) || (ci->fscrypt_auth_len == 0))) {
+               kfree(ci->fscrypt_auth);
+               ci->fscrypt_auth_len = iinfo->fscrypt_auth_len;
+               ci->fscrypt_auth = iinfo->fscrypt_auth;
+               iinfo->fscrypt_auth = NULL;
+               iinfo->fscrypt_auth_len = 0;
+               inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+       }
+#endif
+
        if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
            (issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = mode;
@@ -878,6 +1040,15 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime);
        }
 
+       /* directories have fl_stripe_unit set to zero */
+       if (IS_ENCRYPTED(inode))
+               inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+       else if (le32_to_cpu(info->layout.fl_stripe_unit))
+               inode->i_blkbits =
+                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       else
+               inode->i_blkbits = CEPH_BLOCK_SHIFT;
+
        if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
            (issued & CEPH_CAP_LINK_EXCL) == 0)
                set_nlink(inode, le32_to_cpu(info->nlink));
@@ -899,6 +1070,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
        if (new_version ||
            (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+               u64 size = le64_to_cpu(info->size);
                s64 old_pool = ci->i_layout.pool_id;
                struct ceph_string *old_ns;
 
@@ -912,10 +1084,22 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
                pool_ns = old_ns;
 
+               if (IS_ENCRYPTED(inode) && size &&
+                   iinfo->fscrypt_file_len == sizeof(__le64)) {
+                       u64 fsize = __le64_to_cpu(*(__le64 *)iinfo->fscrypt_file);
+
+                       if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) {
+                               size = fsize;
+                       } else {
+                               pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
+                                       info->size, size);
+                       }
+               }
+
                queue_trunc = ceph_fill_file_size(inode, issued,
                                        le32_to_cpu(info->truncate_seq),
                                        le64_to_cpu(info->truncate_size),
-                                       le64_to_cpu(info->size));
+                                       size);
                /* only update max_size on auth cap */
                if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
                    ci->i_max_size != le64_to_cpu(info->max_size)) {
@@ -975,26 +1159,42 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                inode->i_fop = &ceph_file_fops;
                break;
        case S_IFLNK:
-               inode->i_op = &ceph_symlink_iops;
                if (!ci->i_symlink) {
                        u32 symlen = iinfo->symlink_len;
                        char *sym;
 
                        spin_unlock(&ci->i_ceph_lock);
 
-                       if (symlen != i_size_read(inode)) {
-                               pr_err("%s %llx.%llx BAD symlink "
-                                       "size %lld\n", __func__,
-                                       ceph_vinop(inode),
-                                       i_size_read(inode));
+                       if (IS_ENCRYPTED(inode)) {
+                               if (symlen != i_size_read(inode))
+                                       pr_err("%s %llx.%llx BAD symlink size %lld\n",
+                                               __func__, ceph_vinop(inode),
+                                               i_size_read(inode));
+
+                               err = decode_encrypted_symlink(iinfo->symlink,
+                                                              symlen, (u8 **)&sym);
+                               if (err < 0) {
+                                       pr_err("%s decoding encrypted symlink failed: %d\n",
+                                               __func__, err);
+                                       goto out;
+                               }
+                               symlen = err;
                                i_size_write(inode, symlen);
                                inode->i_blocks = calc_inode_blocks(symlen);
-                       }
+                       } else {
+                               if (symlen != i_size_read(inode)) {
+                                       pr_err("%s %llx.%llx BAD symlink size %lld\n",
+                                               __func__, ceph_vinop(inode),
+                                               i_size_read(inode));
+                                       i_size_write(inode, symlen);
+                                       inode->i_blocks = calc_inode_blocks(symlen);
+                               }
 
-                       err = -ENOMEM;
-                       sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
-                       if (!sym)
-                               goto out;
+                               err = -ENOMEM;
+                               sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
+                               if (!sym)
+                                       goto out;
+                       }
 
                        spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_symlink)
@@ -1002,7 +1202,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                        else
                                kfree(sym); /* lost a race */
                }
-               inode->i_link = ci->i_symlink;
+
+               if (IS_ENCRYPTED(inode)) {
+                       /*
+                        * Encrypted symlinks need to be decrypted before we can
+                        * cache their targets in i_link. Don't touch it here.
+                        */
+                       inode->i_op = &ceph_encrypted_symlink_iops;
+               } else {
+                       inode->i_link = ci->i_symlink;
+                       inode->i_op = &ceph_symlink_iops;
+               }
                break;
        case S_IFDIR:
                inode->i_op = &ceph_dir_iops;
@@ -1310,8 +1520,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
                if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
                    test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
                    !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
+                       bool is_nokey = false;
                        struct qstr dname;
                        struct dentry *dn, *parent;
+                       struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+                       struct ceph_fname fname = { .dir        = dir,
+                                                   .name       = rinfo->dname,
+                                                   .ctext      = rinfo->altname,
+                                                   .name_len   = rinfo->dname_len,
+                                                   .ctext_len  = rinfo->altname_len };
 
                        BUG_ON(!rinfo->head->is_target);
                        BUG_ON(req->r_dentry);
@@ -1319,8 +1536,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
                        parent = d_find_any_alias(dir);
                        BUG_ON(!parent);
 
-                       dname.name = rinfo->dname;
-                       dname.len = rinfo->dname_len;
+                       err = ceph_fname_alloc_buffer(dir, &oname);
+                       if (err < 0) {
+                               dput(parent);
+                               goto done;
+                       }
+
+                       err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
+                       if (err < 0) {
+                               dput(parent);
+                               ceph_fname_free_buffer(dir, &oname);
+                               goto done;
+                       }
+                       dname.name = oname.name;
+                       dname.len = oname.len;
                        dname.hash = full_name_hash(parent, dname.name, dname.len);
                        tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
                        tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
@@ -1335,9 +1564,15 @@ retry_lookup:
                                     dname.len, dname.name, dn);
                                if (!dn) {
                                        dput(parent);
+                                       ceph_fname_free_buffer(dir, &oname);
                                        err = -ENOMEM;
                                        goto done;
                                }
+                               if (is_nokey) {
+                                       spin_lock(&dn->d_lock);
+                                       dn->d_flags |= DCACHE_NOKEY_NAME;
+                                       spin_unlock(&dn->d_lock);
+                               }
                                err = 0;
                        } else if (d_really_is_positive(dn) &&
                                   (ceph_ino(d_inode(dn)) != tvino.ino ||
@@ -1349,6 +1584,7 @@ retry_lookup:
                                dput(dn);
                                goto retry_lookup;
                        }
+                       ceph_fname_free_buffer(dir, &oname);
 
                        req->r_dentry = dn;
                        dput(parent);
@@ -1552,7 +1788,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
                vino.ino = le64_to_cpu(rde->inode.in->ino);
                vino.snap = le64_to_cpu(rde->inode.in->snapid);
 
-               in = ceph_get_inode(req->r_dentry->d_sb, vino);
+               in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL);
                if (IS_ERR(in)) {
                        err = PTR_ERR(in);
                        dout("new_inode badness got %d\n", err);
@@ -1630,7 +1866,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                             struct ceph_mds_session *session)
 {
        struct dentry *parent = req->r_dentry;
-       struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
+       struct inode *inode = d_inode(parent);
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
        struct qstr dname;
        struct dentry *dn;
@@ -1704,9 +1941,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                tvino.snap = le64_to_cpu(rde->inode.in->snapid);
 
                if (rinfo->hash_order) {
-                       u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-                                                rde->name, rde->name_len);
-                       hash = ceph_frag_value(hash);
+                       u32 hash = ceph_frag_value(rde->raw_hash);
                        if (hash != last_hash)
                                fpos_offset = 2;
                        last_hash = hash;
@@ -1729,6 +1964,11 @@ retry_lookup:
                                err = -ENOMEM;
                                goto out;
                        }
+                       if (rde->is_nokey) {
+                               spin_lock(&dn->d_lock);
+                               dn->d_flags |= DCACHE_NOKEY_NAME;
+                               spin_unlock(&dn->d_lock);
+                       }
                } else if (d_really_is_positive(dn) &&
                           (ceph_ino(d_inode(dn)) != tvino.ino ||
                            ceph_snap(d_inode(dn)) != tvino.snap)) {
@@ -1754,7 +1994,7 @@ retry_lookup:
                if (d_really_is_positive(dn)) {
                        in = d_inode(dn);
                } else {
-                       in = ceph_get_inode(parent->d_sb, tvino);
+                       in = ceph_get_inode(parent->d_sb, tvino, NULL);
                        if (IS_ERR(in)) {
                                dout("new_inode badness\n");
                                d_drop(dn);
@@ -1927,7 +2167,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
 retry:
        spin_lock(&ci->i_ceph_lock);
        if (ci->i_truncate_pending == 0) {
-               dout("__do_pending_vmtruncate %p none pending\n", inode);
+               dout("%s %p none pending\n", __func__, inode);
                spin_unlock(&ci->i_ceph_lock);
                mutex_unlock(&ci->i_truncate_mutex);
                return;
@@ -1939,8 +2179,7 @@ retry:
         */
        if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
                spin_unlock(&ci->i_ceph_lock);
-               dout("__do_pending_vmtruncate %p flushing snaps first\n",
-                    inode);
+               dout("%s %p flushing snaps first\n", __func__, inode);
                filemap_write_and_wait_range(&inode->i_data, 0,
                                             inode->i_sb->s_maxbytes);
                goto retry;
@@ -1949,9 +2188,9 @@ retry:
        /* there should be no reader or writer */
        WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
 
-       to = ci->i_truncate_size;
+       to = ci->i_truncate_pagecache_size;
        wrbuffer_refs = ci->i_wrbuffer_ref;
-       dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
+       dout("%s %p (%d) to %lld\n", __func__, inode,
             ci->i_truncate_pending, to);
        spin_unlock(&ci->i_ceph_lock);
 
@@ -1959,7 +2198,7 @@ retry:
        truncate_pagecache(inode, to);
 
        spin_lock(&ci->i_ceph_lock);
-       if (to == ci->i_truncate_size) {
+       if (to == ci->i_truncate_pagecache_size) {
                ci->i_truncate_pending = 0;
                finish = 1;
        }
@@ -2000,6 +2239,32 @@ static void ceph_inode_work(struct work_struct *work)
        iput(inode);
 }
 
+static const char *ceph_encrypted_get_link(struct dentry *dentry,
+                                          struct inode *inode,
+                                          struct delayed_call *done)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       if (!dentry)
+               return ERR_PTR(-ECHILD);
+
+       return fscrypt_get_symlink(inode, ci->i_symlink, i_size_read(inode),
+                                  done);
+}
+
+static int ceph_encrypted_symlink_getattr(struct mnt_idmap *idmap,
+                                         const struct path *path,
+                                         struct kstat *stat, u32 request_mask,
+                                         unsigned int query_flags)
+{
+       int ret;
+
+       ret = ceph_getattr(idmap, path, stat, request_mask, query_flags);
+       if (ret)
+               return ret;
+       return fscrypt_symlink_getattr(path, stat);
+}
+
 /*
  * symlinks
  */
@@ -2010,20 +2275,173 @@ static const struct inode_operations ceph_symlink_iops = {
        .listxattr = ceph_listxattr,
 };
 
-int __ceph_setattr(struct inode *inode, struct iattr *attr)
+static const struct inode_operations ceph_encrypted_symlink_iops = {
+       .get_link = ceph_encrypted_get_link,
+       .setattr = ceph_setattr,
+       .getattr = ceph_encrypted_symlink_getattr,
+       .listxattr = ceph_listxattr,
+};
+
+/*
+ * Transfer the encrypted last block to the MDS and the MDS
+ * will help update it when truncating a smaller size.
+ *
+ * We don't support a PAGE_SIZE that is smaller than the
+ * CEPH_FSCRYPT_BLOCK_SIZE.
+ */
+static int fill_fscrypt_truncate(struct inode *inode,
+                                struct ceph_mds_request *req,
+                                struct iattr *attr)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE;
+       loff_t pos, orig_pos = round_down(attr->ia_size,
+                                         CEPH_FSCRYPT_BLOCK_SIZE);
+       u64 block = orig_pos >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       struct ceph_pagelist *pagelist = NULL;
+       struct kvec iov = {0};
+       struct iov_iter iter;
+       struct page *page = NULL;
+       struct ceph_fscrypt_truncate_size_header header;
+       int retry_op = 0;
+       int len = CEPH_FSCRYPT_BLOCK_SIZE;
+       loff_t i_size = i_size_read(inode);
+       int got, ret, issued;
+       u64 objver;
+
+       ret = __ceph_get_caps(inode, NULL, CEPH_CAP_FILE_RD, 0, -1, &got);
+       if (ret < 0)
+               return ret;
+
+       issued = __ceph_caps_issued(ci, NULL);
+
+       dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__,
+            i_size, attr->ia_size, ceph_cap_string(got),
+            ceph_cap_string(issued));
+
+       /* Try to writeback the dirty pagecaches */
+       if (issued & (CEPH_CAP_FILE_BUFFER)) {
+               loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+
+               ret = filemap_write_and_wait_range(inode->i_mapping,
+                                                  orig_pos, lend);
+               if (ret < 0)
+                       goto out;
+       }
+
+       page = __page_cache_alloc(GFP_KERNEL);
+       if (page == NULL) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       pagelist = ceph_pagelist_alloc(GFP_KERNEL);
+       if (!pagelist) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       iov.iov_base = kmap_local_page(page);
+       iov.iov_len = len;
+       iov_iter_kvec(&iter, READ, &iov, 1, len);
+
+       pos = orig_pos;
+       ret = __ceph_sync_read(inode, &pos, &iter, &retry_op, &objver);
+       if (ret < 0)
+               goto out;
+
+       /* Insert the header first */
+       header.ver = 1;
+       header.compat = 1;
+       header.change_attr = cpu_to_le64(inode_peek_iversion_raw(inode));
+
+       /*
+        * Always set the block_size to CEPH_FSCRYPT_BLOCK_SIZE,
+        * because in MDS it may need this to do the truncate.
+        */
+       header.block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE);
+
+       /*
+        * If we hit a hole here, we should just skip filling
+        * the fscrypt for the request, because once the fscrypt
+        * is enabled, the file will be split into many blocks
+        * with the size of CEPH_FSCRYPT_BLOCK_SIZE, if there
+        * has a hole, the hole size should be multiple of block
+        * size.
+        *
+        * If the Rados object doesn't exist, it will be set to 0.
+        */
+       if (!objver) {
+               dout("%s hit hole, ppos %lld < size %lld\n", __func__,
+                    pos, i_size);
+
+               header.data_len = cpu_to_le32(8 + 8 + 4);
+               header.file_offset = 0;
+               ret = 0;
+       } else {
+               header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE);
+               header.file_offset = cpu_to_le64(orig_pos);
+
+               dout("%s encrypt block boff/bsize %d/%lu\n", __func__,
+                    boff, CEPH_FSCRYPT_BLOCK_SIZE);
+
+               /* truncate and zero out the extra contents for the last block */
+               memset(iov.iov_base + boff, 0, PAGE_SIZE - boff);
+
+               /* encrypt the last block */
+               ret = ceph_fscrypt_encrypt_block_inplace(inode, page,
+                                                   CEPH_FSCRYPT_BLOCK_SIZE,
+                                                   0, block,
+                                                   GFP_KERNEL);
+               if (ret)
+                       goto out;
+       }
+
+       /* Insert the header */
+       ret = ceph_pagelist_append(pagelist, &header, sizeof(header));
+       if (ret)
+               goto out;
+
+       if (header.block_size) {
+               /* Append the last block contents to pagelist */
+               ret = ceph_pagelist_append(pagelist, iov.iov_base,
+                                          CEPH_FSCRYPT_BLOCK_SIZE);
+               if (ret)
+                       goto out;
+       }
+       req->r_pagelist = pagelist;
+out:
+       dout("%s %p size dropping cap refs on %s\n", __func__,
+            inode, ceph_cap_string(got));
+       ceph_put_cap_refs(ci, got);
+       if (iov.iov_base)
+               kunmap_local(iov.iov_base);
+       if (page)
+               __free_pages(page, 0);
+       if (ret && pagelist)
+               ceph_pagelist_release(pagelist);
+       return ret;
+}
+
+int __ceph_setattr(struct inode *inode, struct iattr *attr,
+                  struct ceph_iattr *cia)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_cap_flush *prealloc_cf;
+       loff_t isize = i_size_read(inode);
        int issued;
        int release = 0, dirtied = 0;
        int mask = 0;
        int err = 0;
        int inode_dirty_flags = 0;
        bool lock_snap_rwsem = false;
+       bool fill_fscrypt;
+       int truncate_retry = 20; /* The RMW will take around 50ms */
 
+retry:
        prealloc_cf = ceph_alloc_cap_flush();
        if (!prealloc_cf)
                return -ENOMEM;
@@ -2035,6 +2453,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                return PTR_ERR(req);
        }
 
+       fill_fscrypt = false;
        spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
 
@@ -2050,6 +2469,43 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
        }
 
        dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (cia && cia->fscrypt_auth) {
+               u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth);
+
+               if (len > sizeof(*cia->fscrypt_auth)) {
+                       err = -EINVAL;
+                       spin_unlock(&ci->i_ceph_lock);
+                       goto out;
+               }
+
+               dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n",
+                       ceph_vinop(inode), ci->fscrypt_auth_len, len);
+
+               /* It should never be re-set once set */
+               WARN_ON_ONCE(ci->fscrypt_auth);
+
+               if (issued & CEPH_CAP_AUTH_EXCL) {
+                       dirtied |= CEPH_CAP_AUTH_EXCL;
+                       kfree(ci->fscrypt_auth);
+                       ci->fscrypt_auth = (u8 *)cia->fscrypt_auth;
+                       ci->fscrypt_auth_len = len;
+               } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+                          ci->fscrypt_auth_len != len ||
+                          memcmp(ci->fscrypt_auth, cia->fscrypt_auth, len)) {
+                       req->r_fscrypt_auth = cia->fscrypt_auth;
+                       mask |= CEPH_SETATTR_FSCRYPT_AUTH;
+                       release |= CEPH_CAP_AUTH_SHARED;
+               }
+               cia->fscrypt_auth = NULL;
+       }
+#else
+       if (cia && cia->fscrypt_auth) {
+               err = -EINVAL;
+               spin_unlock(&ci->i_ceph_lock);
+               goto out;
+       }
+#endif /* CONFIG_FS_ENCRYPTION */
 
        if (ia_valid & ATTR_UID) {
                dout("setattr %p uid %d -> %d\n", inode,
@@ -2119,10 +2575,27 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                }
        }
        if (ia_valid & ATTR_SIZE) {
-               loff_t isize = i_size_read(inode);
-
                dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
-               if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
+               /*
+                * Only when the new size is smaller and not aligned to
+                * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed.
+                */
+               if (IS_ENCRYPTED(inode) && attr->ia_size < isize &&
+                   (attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE)) {
+                       mask |= CEPH_SETATTR_SIZE;
+                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+                       set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+                       mask |= CEPH_SETATTR_FSCRYPT_FILE;
+                       req->r_args.setattr.size =
+                               cpu_to_le64(round_up(attr->ia_size,
+                                                    CEPH_FSCRYPT_BLOCK_SIZE));
+                       req->r_args.setattr.old_size =
+                               cpu_to_le64(round_up(isize,
+                                                    CEPH_FSCRYPT_BLOCK_SIZE));
+                       req->r_fscrypt_file = attr->ia_size;
+                       fill_fscrypt = true;
+               } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
                        if (attr->ia_size > isize) {
                                i_size_write(inode, attr->ia_size);
                                inode->i_blocks = calc_inode_blocks(attr->ia_size);
@@ -2132,11 +2605,24 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                        }
                } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
                           attr->ia_size != isize) {
-                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
-                       req->r_args.setattr.old_size = cpu_to_le64(isize);
                        mask |= CEPH_SETATTR_SIZE;
                        release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+                       if (IS_ENCRYPTED(inode) && attr->ia_size) {
+                               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+                               mask |= CEPH_SETATTR_FSCRYPT_FILE;
+                               req->r_args.setattr.size =
+                                       cpu_to_le64(round_up(attr->ia_size,
+                                                            CEPH_FSCRYPT_BLOCK_SIZE));
+                               req->r_args.setattr.old_size =
+                                       cpu_to_le64(round_up(isize,
+                                                            CEPH_FSCRYPT_BLOCK_SIZE));
+                               req->r_fscrypt_file = attr->ia_size;
+                       } else {
+                               req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+                               req->r_args.setattr.old_size = cpu_to_le64(isize);
+                               req->r_fscrypt_file = 0;
+                       }
                }
        }
        if (ia_valid & ATTR_MTIME) {
@@ -2199,8 +2685,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 
        release &= issued;
        spin_unlock(&ci->i_ceph_lock);
-       if (lock_snap_rwsem)
+       if (lock_snap_rwsem) {
                up_read(&mdsc->snap_rwsem);
+               lock_snap_rwsem = false;
+       }
 
        if (inode_dirty_flags)
                __mark_inode_dirty(inode, inode_dirty_flags);
@@ -2212,8 +2700,29 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                req->r_args.setattr.mask = cpu_to_le32(mask);
                req->r_num_caps = 1;
                req->r_stamp = attr->ia_ctime;
+               if (fill_fscrypt) {
+                       err = fill_fscrypt_truncate(inode, req, attr);
+                       if (err)
+                               goto out;
+               }
+
+               /*
+                * The truncate request will return -EAGAIN when the
+                * last block has been updated just before the MDS
+                * successfully gets the xlock for the FILE lock. To
+                * avoid corrupting the file contents we need to retry
+                * it.
+                */
                err = ceph_mdsc_do_request(mdsc, NULL, req);
+               if (err == -EAGAIN && truncate_retry--) {
+                       dout("setattr %p result=%d (%s locally, %d remote), retry it!\n",
+                            inode, err, ceph_cap_string(dirtied), mask);
+                       ceph_mdsc_put_request(req);
+                       ceph_free_cap_flush(prealloc_cf);
+                       goto retry;
+               }
        }
+out:
        dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
             ceph_cap_string(dirtied), mask);
 
@@ -2242,6 +2751,10 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
        if (ceph_inode_is_shutdown(inode))
                return -ESTALE;
 
+       err = fscrypt_prepare_setattr(dentry, attr);
+       if (err)
+               return err;
+
        err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
        if (err != 0)
                return err;
@@ -2254,7 +2767,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
            ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
                return -EDQUOT;
 
-       err = __ceph_setattr(inode, attr);
+       err = __ceph_setattr(inode, attr, NULL);
 
        if (err >= 0 && (attr->ia_valid & ATTR_MODE))
                err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode);
@@ -2525,8 +3038,12 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
                        stat->nlink = 1 + 1 + ci->i_subdirs;
        }
 
-       stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
        stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
+       if (IS_ENCRYPTED(inode))
+               stat->attributes |= STATX_ATTR_ENCRYPTED;
+       stat->attributes_mask |= (STATX_ATTR_CHANGE_MONOTONIC |
+                                 STATX_ATTR_ENCRYPTED);
+
        stat->result_mask = request_mask & valid_mask;
        return err;
 }
index deac817..91a8491 100644 (file)
@@ -6,6 +6,7 @@
 #include "mds_client.h"
 #include "ioctl.h"
 #include <linux/ceph/striper.h>
+#include <linux/fscrypt.h>
 
 /*
  * ioctls
@@ -268,9 +269,96 @@ static long ceph_ioctl_syncio(struct file *file)
        return 0;
 }
 
+static int vet_mds_for_fscrypt(struct file *file)
+{
+       int i, ret = -EOPNOTSUPP;
+       struct ceph_mds_client  *mdsc = ceph_sb_to_mdsc(file_inode(file)->i_sb);
+
+       mutex_lock(&mdsc->mutex);
+       for (i = 0; i < mdsc->max_sessions; i++) {
+               struct ceph_mds_session *s = mdsc->sessions[i];
+
+               if (!s)
+                       continue;
+               if (test_bit(CEPHFS_FEATURE_ALTERNATE_NAME, &s->s_features))
+                       ret = 0;
+               break;
+       }
+       mutex_unlock(&mdsc->mutex);
+       return ret;
+}
+
+static long ceph_set_encryption_policy(struct file *file, unsigned long arg)
+{
+       int ret, got = 0;
+       struct inode *inode = file_inode(file);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* encrypted directories can't have striped layout */
+       if (ci->i_layout.stripe_count > 1)
+               return -EINVAL;
+
+       ret = vet_mds_for_fscrypt(file);
+       if (ret)
+               return ret;
+
+       /*
+        * Ensure we hold these caps so that we _know_ that the rstats check
+        * in the empty_dir check is reliable.
+        */
+       ret = ceph_get_caps(file, CEPH_CAP_FILE_SHARED, 0, -1, &got);
+       if (ret)
+               return ret;
+
+       ret = fscrypt_ioctl_set_policy(file, (const void __user *)arg);
+       if (got)
+               ceph_put_cap_refs(ci, got);
+
+       return ret;
+}
+
+static const char *ceph_ioctl_cmd_name(const unsigned int cmd)
+{
+       switch (cmd) {
+       case CEPH_IOC_GET_LAYOUT:
+               return "get_layout";
+       case CEPH_IOC_SET_LAYOUT:
+               return "set_layout";
+       case CEPH_IOC_SET_LAYOUT_POLICY:
+               return "set_layout_policy";
+       case CEPH_IOC_GET_DATALOC:
+               return "get_dataloc";
+       case CEPH_IOC_LAZYIO:
+               return "lazyio";
+       case CEPH_IOC_SYNCIO:
+               return "syncio";
+       case FS_IOC_SET_ENCRYPTION_POLICY:
+               return "set_encryption_policy";
+       case FS_IOC_GET_ENCRYPTION_POLICY:
+               return "get_encryption_policy";
+       case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+               return "get_encryption_policy_ex";
+       case FS_IOC_ADD_ENCRYPTION_KEY:
+               return "add_encryption_key";
+       case FS_IOC_REMOVE_ENCRYPTION_KEY:
+               return "remove_encryption_key";
+       case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+               return "remove_encryption_key_all_users";
+       case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+               return "get_encryption_key_status";
+       case FS_IOC_GET_ENCRYPTION_NONCE:
+               return "get_encryption_nonce";
+       default:
+               return "unknown";
+       }
+}
+
 long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-       dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+       int ret;
+
+       dout("ioctl file %p cmd %s arg %lu\n", file,
+            ceph_ioctl_cmd_name(cmd), arg);
        switch (cmd) {
        case CEPH_IOC_GET_LAYOUT:
                return ceph_ioctl_get_layout(file, (void __user *)arg);
@@ -289,6 +377,43 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
        case CEPH_IOC_SYNCIO:
                return ceph_ioctl_syncio(file);
+
+       case FS_IOC_SET_ENCRYPTION_POLICY:
+               return ceph_set_encryption_policy(file, arg);
+
+       case FS_IOC_GET_ENCRYPTION_POLICY:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_policy(file, (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_policy_ex(file, (void __user *)arg);
+
+       case FS_IOC_ADD_ENCRYPTION_KEY:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_add_key(file, (void __user *)arg);
+
+       case FS_IOC_REMOVE_ENCRYPTION_KEY:
+               return fscrypt_ioctl_remove_key(file, (void __user *)arg);
+
+       case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+               return fscrypt_ioctl_remove_key_all_users(file,
+                                                         (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+               return fscrypt_ioctl_get_key_status(file, (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_NONCE:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_nonce(file, (void __user *)arg);
        }
 
        return -ENOTTY;
index 5fb367b..615db14 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/messenger.h>
@@ -184,8 +185,54 @@ static int parse_reply_info_in(void **p, void *end,
                        info->rsnaps = 0;
                }
 
+               if (struct_v >= 5) {
+                       u32 alen;
+
+                       ceph_decode_32_safe(p, end, alen, bad);
+
+                       while (alen--) {
+                               u32 len;
+
+                               /* key */
+                               ceph_decode_32_safe(p, end, len, bad);
+                               ceph_decode_skip_n(p, end, len, bad);
+                               /* value */
+                               ceph_decode_32_safe(p, end, len, bad);
+                               ceph_decode_skip_n(p, end, len, bad);
+                       }
+               }
+
+               /* fscrypt flag -- ignore */
+               if (struct_v >= 6)
+                       ceph_decode_skip_8(p, end, bad);
+
+               info->fscrypt_auth = NULL;
+               info->fscrypt_auth_len = 0;
+               info->fscrypt_file = NULL;
+               info->fscrypt_file_len = 0;
+               if (struct_v >= 7) {
+                       ceph_decode_32_safe(p, end, info->fscrypt_auth_len, bad);
+                       if (info->fscrypt_auth_len) {
+                               info->fscrypt_auth = kmalloc(info->fscrypt_auth_len,
+                                                            GFP_KERNEL);
+                               if (!info->fscrypt_auth)
+                                       return -ENOMEM;
+                               ceph_decode_copy_safe(p, end, info->fscrypt_auth,
+                                                     info->fscrypt_auth_len, bad);
+                       }
+                       ceph_decode_32_safe(p, end, info->fscrypt_file_len, bad);
+                       if (info->fscrypt_file_len) {
+                               info->fscrypt_file = kmalloc(info->fscrypt_file_len,
+                                                            GFP_KERNEL);
+                               if (!info->fscrypt_file)
+                                       return -ENOMEM;
+                               ceph_decode_copy_safe(p, end, info->fscrypt_file,
+                                                     info->fscrypt_file_len, bad);
+                       }
+               }
                *p = end;
        } else {
+               /* legacy (unversioned) struct */
                if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
                        ceph_decode_64_safe(p, end, info->inline_version, bad);
                        ceph_decode_32_safe(p, end, info->inline_len, bad);
@@ -263,27 +310,47 @@ bad:
 
 static int parse_reply_info_lease(void **p, void *end,
                                  struct ceph_mds_reply_lease **lease,
-                                 u64 features)
+                                 u64 features, u32 *altname_len, u8 **altname)
 {
+       u8 struct_v;
+       u32 struct_len;
+       void *lend;
+
        if (features == (u64)-1) {
-               u8 struct_v, struct_compat;
-               u32 struct_len;
+               u8 struct_compat;
+
                ceph_decode_8_safe(p, end, struct_v, bad);
                ceph_decode_8_safe(p, end, struct_compat, bad);
+
                /* struct_v is expected to be >= 1. we only understand
                 * encoding whose struct_compat == 1. */
                if (!struct_v || struct_compat != 1)
                        goto bad;
+
                ceph_decode_32_safe(p, end, struct_len, bad);
-               ceph_decode_need(p, end, struct_len, bad);
-               end = *p + struct_len;
+       } else {
+               struct_len = sizeof(**lease);
+               *altname_len = 0;
+               *altname = NULL;
        }
 
-       ceph_decode_need(p, end, sizeof(**lease), bad);
+       lend = *p + struct_len;
+       ceph_decode_need(p, end, struct_len, bad);
        *lease = *p;
        *p += sizeof(**lease);
-       if (features == (u64)-1)
-               *p = end;
+
+       if (features == (u64)-1) {
+               if (struct_v >= 2) {
+                       ceph_decode_32_safe(p, end, *altname_len, bad);
+                       ceph_decode_need(p, end, *altname_len, bad);
+                       *altname = *p;
+                       *p += *altname_len;
+               } else {
+                       *altname = NULL;
+                       *altname_len = 0;
+               }
+       }
+       *p = lend;
        return 0;
 bad:
        return -EIO;
@@ -313,7 +380,8 @@ static int parse_reply_info_trace(void **p, void *end,
                info->dname = *p;
                *p += info->dname_len;
 
-               err = parse_reply_info_lease(p, end, &info->dlease, features);
+               err = parse_reply_info_lease(p, end, &info->dlease, features,
+                                            &info->altname_len, &info->altname);
                if (err < 0)
                        goto out_bad;
        }
@@ -339,9 +407,10 @@ out_bad:
  * parse readdir results
  */
 static int parse_reply_info_readdir(void **p, void *end,
-                               struct ceph_mds_reply_info_parsed *info,
-                               u64 features)
+                                   struct ceph_mds_request *req,
+                                   u64 features)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        u32 num, i = 0;
        int err;
 
@@ -371,18 +440,87 @@ static int parse_reply_info_readdir(void **p, void *end,
 
        info->dir_nr = num;
        while (num) {
+               struct inode *inode = d_inode(req->r_dentry);
+               struct ceph_inode_info *ci = ceph_inode(inode);
                struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+               struct fscrypt_str tname = FSTR_INIT(NULL, 0);
+               struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+               struct ceph_fname fname;
+               u32 altname_len, _name_len;
+               u8 *altname, *_name;
+
                /* dentry */
-               ceph_decode_32_safe(p, end, rde->name_len, bad);
-               ceph_decode_need(p, end, rde->name_len, bad);
-               rde->name = *p;
-               *p += rde->name_len;
-               dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
+               ceph_decode_32_safe(p, end, _name_len, bad);
+               ceph_decode_need(p, end, _name_len, bad);
+               _name = *p;
+               *p += _name_len;
+               dout("parsed dir dname '%.*s'\n", _name_len, _name);
+
+               if (info->hash_order)
+                       rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+                                                     _name, _name_len);
 
                /* dentry lease */
-               err = parse_reply_info_lease(p, end, &rde->lease, features);
+               err = parse_reply_info_lease(p, end, &rde->lease, features,
+                                            &altname_len, &altname);
                if (err)
                        goto out_bad;
+
+               /*
+                * Try to dencrypt the dentry names and update them
+                * in the ceph_mds_reply_dir_entry struct.
+                */
+               fname.dir = inode;
+               fname.name = _name;
+               fname.name_len = _name_len;
+               fname.ctext = altname;
+               fname.ctext_len = altname_len;
+               /*
+                * The _name_len maybe larger than altname_len, such as
+                * when the human readable name length is in range of
+                * (CEPH_NOHASH_NAME_MAX, CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE),
+                * then the copy in ceph_fname_to_usr will corrupt the
+                * data if there has no encryption key.
+                *
+                * Just set the no_copy flag and then if there has no
+                * encryption key the oname.name will be assigned to
+                * _name always.
+                */
+               fname.no_copy = true;
+               if (altname_len == 0) {
+                       /*
+                        * Set tname to _name, and this will be used
+                        * to do the base64_decode in-place. It's
+                        * safe because the decoded string should
+                        * always be shorter, which is 3/4 of origin
+                        * string.
+                        */
+                       tname.name = _name;
+
+                       /*
+                        * Set oname to _name too, and this will be
+                        * used to do the dencryption in-place.
+                        */
+                       oname.name = _name;
+                       oname.len = _name_len;
+               } else {
+                       /*
+                        * This will do the decryption only in-place
+                        * from altname cryptext directly.
+                        */
+                       oname.name = altname;
+                       oname.len = altname_len;
+               }
+               rde->is_nokey = false;
+               err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey);
+               if (err) {
+                       pr_err("%s unable to decode %.*s, got %d\n", __func__,
+                              _name_len, _name, err);
+                       goto out_bad;
+               }
+               rde->name = oname.name;
+               rde->name_len = oname.len;
+
                /* inode */
                err = parse_reply_info_in(p, end, &rde->inode, features);
                if (err < 0)
@@ -581,15 +719,16 @@ bad:
  * parse extra results
  */
 static int parse_reply_info_extra(void **p, void *end,
-                                 struct ceph_mds_reply_info_parsed *info,
+                                 struct ceph_mds_request *req,
                                  u64 features, struct ceph_mds_session *s)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        u32 op = le32_to_cpu(info->head->op);
 
        if (op == CEPH_MDS_OP_GETFILELOCK)
                return parse_reply_info_filelock(p, end, info, features);
        else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
-               return parse_reply_info_readdir(p, end, info, features);
+               return parse_reply_info_readdir(p, end, req, features);
        else if (op == CEPH_MDS_OP_CREATE)
                return parse_reply_info_create(p, end, info, features, s);
        else if (op == CEPH_MDS_OP_GETVXATTR)
@@ -602,9 +741,9 @@ static int parse_reply_info_extra(void **p, void *end,
  * parse entire mds reply
  */
 static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
-                           struct ceph_mds_reply_info_parsed *info,
-                           u64 features)
+                           struct ceph_mds_request *req, u64 features)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        void *p, *end;
        u32 len;
        int err;
@@ -626,7 +765,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
        ceph_decode_32_safe(&p, end, len, bad);
        if (len > 0) {
                ceph_decode_need(&p, end, len, bad);
-               err = parse_reply_info_extra(&p, p+len, info, features, s);
+               err = parse_reply_info_extra(&p, p+len, req, features, s);
                if (err < 0)
                        goto out_bad;
        }
@@ -651,8 +790,21 @@ out_bad:
 
 static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
 {
+       int i;
+
+       kfree(info->diri.fscrypt_auth);
+       kfree(info->diri.fscrypt_file);
+       kfree(info->targeti.fscrypt_auth);
+       kfree(info->targeti.fscrypt_file);
        if (!info->dir_entries)
                return;
+
+       for (i = 0; i < info->dir_nr; i++) {
+               struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+
+               kfree(rde->inode.fscrypt_auth);
+               kfree(rde->inode.fscrypt_file);
+       }
        free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
 }
 
@@ -945,6 +1097,7 @@ void ceph_mdsc_release_request(struct kref *kref)
                iput(req->r_parent);
        }
        iput(req->r_target_inode);
+       iput(req->r_new_inode);
        if (req->r_dentry)
                dput(req->r_dentry);
        if (req->r_old_dentry)
@@ -965,6 +1118,8 @@ void ceph_mdsc_release_request(struct kref *kref)
        put_cred(req->r_cred);
        if (req->r_pagelist)
                ceph_pagelist_release(req->r_pagelist);
+       kfree(req->r_fscrypt_auth);
+       kfree(req->r_altname);
        put_request_session(req);
        ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
        WARN_ON_ONCE(!list_empty(&req->r_wait));
@@ -2373,20 +2528,90 @@ static inline  u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
        return mdsc->oldest_tid;
 }
 
-/*
- * Build a dentry's path.  Allocate on heap; caller must kfree.  Based
- * on build_path_from_dentry in fs/cifs/dir.c.
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+       struct inode *dir = req->r_parent;
+       struct dentry *dentry = req->r_dentry;
+       u8 *cryptbuf = NULL;
+       u32 len = 0;
+       int ret = 0;
+
+       /* only encode if we have parent and dentry */
+       if (!dir || !dentry)
+               goto success;
+
+       /* No-op unless this is encrypted */
+       if (!IS_ENCRYPTED(dir))
+               goto success;
+
+       ret = ceph_fscrypt_prepare_readdir(dir);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       /* No key? Just ignore it. */
+       if (!fscrypt_has_encryption_key(dir))
+               goto success;
+
+       if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
+                                         &len)) {
+               WARN_ON_ONCE(1);
+               return ERR_PTR(-ENAMETOOLONG);
+       }
+
+       /* No need to append altname if name is short enough */
+       if (len <= CEPH_NOHASH_NAME_MAX) {
+               len = 0;
+               goto success;
+       }
+
+       cryptbuf = kmalloc(len, GFP_KERNEL);
+       if (!cryptbuf)
+               return ERR_PTR(-ENOMEM);
+
+       ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+       if (ret) {
+               kfree(cryptbuf);
+               return ERR_PTR(ret);
+       }
+success:
+       *plen = len;
+       return cryptbuf;
+}
+#else
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+       *plen = 0;
+       return NULL;
+}
+#endif
+
+/**
+ * ceph_mdsc_build_path - build a path string to a given dentry
+ * @dentry: dentry to which path should be built
+ * @plen: returned length of string
+ * @pbase: returned base inode number
+ * @for_wire: is this path going to be sent to the MDS?
+ *
+ * Build a string that represents the path to the dentry. This is mostly called
+ * for two different purposes:
+ *
+ * 1) we need to build a path string to send to the MDS (for_wire == true)
+ * 2) we need a path string for local presentation (e.g. debugfs)
+ *    (for_wire == false)
  *
- * If @stop_on_nosnap, generate path relative to the first non-snapped
- * inode.
+ * The path is built in reverse, starting with the dentry. Walk back up toward
+ * the root, building the path until the first non-snapped inode is reached
+ * (for_wire) or the root inode is reached (!for_wire).
  *
  * Encode hidden .snap dirs as a double /, i.e.
  *   foo/.snap/bar -> foo//bar
  */
 char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
-                          int stop_on_nosnap)
+                          int for_wire)
 {
-       struct dentry *temp;
+       struct dentry *cur;
+       struct inode *inode;
        char *path;
        int pos;
        unsigned seq;
@@ -2403,34 +2628,72 @@ retry:
        path[pos] = '\0';
 
        seq = read_seqbegin(&rename_lock);
-       rcu_read_lock();
-       temp = dentry;
+       cur = dget(dentry);
        for (;;) {
-               struct inode *inode;
+               struct dentry *parent;
 
-               spin_lock(&temp->d_lock);
-               inode = d_inode(temp);
+               spin_lock(&cur->d_lock);
+               inode = d_inode(cur);
                if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
                        dout("build_path path+%d: %p SNAPDIR\n",
-                            pos, temp);
-               } else if (stop_on_nosnap && inode && dentry != temp &&
+                            pos, cur);
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
+               } else if (for_wire && inode && dentry != cur &&
                           ceph_snap(inode) == CEPH_NOSNAP) {
-                       spin_unlock(&temp->d_lock);
+                       spin_unlock(&cur->d_lock);
                        pos++; /* get rid of any prepended '/' */
                        break;
+               } else if (!for_wire || !IS_ENCRYPTED(d_inode(cur->d_parent))) {
+                       pos -= cur->d_name.len;
+                       if (pos < 0) {
+                               spin_unlock(&cur->d_lock);
+                               break;
+                       }
+                       memcpy(path + pos, cur->d_name.name, cur->d_name.len);
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
                } else {
-                       pos -= temp->d_name.len;
+                       int len, ret;
+                       char buf[NAME_MAX];
+
+                       /*
+                        * Proactively copy name into buf, in case we need to
+                        * present it as-is.
+                        */
+                       memcpy(buf, cur->d_name.name, cur->d_name.len);
+                       len = cur->d_name.len;
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
+
+                       ret = ceph_fscrypt_prepare_readdir(d_inode(parent));
+                       if (ret < 0) {
+                               dput(parent);
+                               dput(cur);
+                               return ERR_PTR(ret);
+                       }
+
+                       if (fscrypt_has_encryption_key(d_inode(parent))) {
+                               len = ceph_encode_encrypted_fname(d_inode(parent),
+                                                                 cur, buf);
+                               if (len < 0) {
+                                       dput(parent);
+                                       dput(cur);
+                                       return ERR_PTR(len);
+                               }
+                       }
+                       pos -= len;
                        if (pos < 0) {
-                               spin_unlock(&temp->d_lock);
+                               dput(parent);
                                break;
                        }
-                       memcpy(path + pos, temp->d_name.name, temp->d_name.len);
+                       memcpy(path + pos, buf, len);
                }
-               spin_unlock(&temp->d_lock);
-               temp = READ_ONCE(temp->d_parent);
+               dput(cur);
+               cur = parent;
 
                /* Are we at the root? */
-               if (IS_ROOT(temp))
+               if (IS_ROOT(cur))
                        break;
 
                /* Are we out of buffer? */
@@ -2439,8 +2702,9 @@ retry:
 
                path[pos] = '/';
        }
-       base = ceph_ino(d_inode(temp));
-       rcu_read_unlock();
+       inode = d_inode(cur);
+       base = inode ? ceph_ino(inode) : 0;
+       dput(cur);
 
        if (read_seqretry(&rename_lock, seq))
                goto retry;
@@ -2450,8 +2714,8 @@ retry:
                 * A rename didn't occur, but somehow we didn't end up where
                 * we thought we would. Throw a warning and try again.
                 */
-               pr_warn("build_path did not end path lookup where "
-                       "expected, pos is %d\n", pos);
+               pr_warn("build_path did not end path lookup where expected (pos = %d)\n",
+                       pos);
                goto retry;
        }
 
@@ -2471,7 +2735,8 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
        rcu_read_lock();
        if (!dir)
                dir = d_inode_rcu(dentry->d_parent);
-       if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
+       if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
+           !IS_ENCRYPTED(dir)) {
                *pino = ceph_ino(dir);
                rcu_read_unlock();
                *ppath = dentry->d_name.name;
@@ -2539,8 +2804,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
        return r;
 }
 
-static void encode_timestamp_and_gids(void **p,
-                                     const struct ceph_mds_request *req)
+static void encode_mclientrequest_tail(void **p,
+                                      const struct ceph_mds_request *req)
 {
        struct ceph_timespec ts;
        int i;
@@ -2548,11 +2813,43 @@ static void encode_timestamp_and_gids(void **p,
        ceph_encode_timespec64(&ts, &req->r_stamp);
        ceph_encode_copy(p, &ts, sizeof(ts));
 
-       /* gid_list */
+       /* v4: gid_list */
        ceph_encode_32(p, req->r_cred->group_info->ngroups);
        for (i = 0; i < req->r_cred->group_info->ngroups; i++)
                ceph_encode_64(p, from_kgid(&init_user_ns,
                                            req->r_cred->group_info->gid[i]));
+
+       /* v5: altname */
+       ceph_encode_32(p, req->r_altname_len);
+       ceph_encode_copy(p, req->r_altname, req->r_altname_len);
+
+       /* v6: fscrypt_auth and fscrypt_file */
+       if (req->r_fscrypt_auth) {
+               u32 authlen = ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+               ceph_encode_32(p, authlen);
+               ceph_encode_copy(p, req->r_fscrypt_auth, authlen);
+       } else {
+               ceph_encode_32(p, 0);
+       }
+       if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) {
+               ceph_encode_32(p, sizeof(__le64));
+               ceph_encode_64(p, req->r_fscrypt_file);
+       } else {
+               ceph_encode_32(p, 0);
+       }
+}
+
+static struct ceph_mds_request_head_legacy *
+find_legacy_request_head(void *p, u64 features)
+{
+       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+       struct ceph_mds_request_head_old *ohead;
+
+       if (legacy)
+               return (struct ceph_mds_request_head_legacy *)p;
+       ohead = (struct ceph_mds_request_head_old *)p;
+       return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
 }
 
 /*
@@ -2565,7 +2862,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
        struct ceph_msg *msg;
-       struct ceph_mds_request_head_old *head;
+       struct ceph_mds_request_head_legacy *lhead;
        const char *path1 = NULL;
        const char *path2 = NULL;
        u64 ino1 = 0, ino2 = 0;
@@ -2577,6 +2874,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        void *p, *end;
        int ret;
        bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        ret = set_request_path_attr(req->r_inode, req->r_dentry,
                              req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2601,12 +2900,32 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                goto out_free1;
        }
 
-       len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
-       len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
-               sizeof(struct ceph_timespec);
-       len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+       req->r_altname = get_fscrypt_altname(req, &req->r_altname_len);
+       if (IS_ERR(req->r_altname)) {
+               msg = ERR_CAST(req->r_altname);
+               req->r_altname = NULL;
+               goto out_free2;
+       }
+
+       /*
+        * For old cephs without supporting the 32bit retry/fwd feature
+        * it will copy the raw memories directly when decoding the
+        * requests. While new cephs will decode the head depending the
+        * version member, so we need to make sure it will be compatible
+        * with them both.
+        */
+       if (legacy)
+               len = sizeof(struct ceph_mds_request_head_legacy);
+       else if (old_version)
+               len = sizeof(struct ceph_mds_request_head_old);
+       else
+               len = sizeof(struct ceph_mds_request_head);
 
-       /* calculate (max) length for cap releases */
+       /* filepaths */
+       len += 2 * (1 + sizeof(u32) + sizeof(u64));
+       len += pathlen1 + pathlen2;
+
+       /* cap releases */
        len += sizeof(struct ceph_mds_request_release) *
                (!!req->r_inode_drop + !!req->r_dentry_drop +
                 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
@@ -2616,6 +2935,27 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        if (req->r_old_dentry_drop)
                len += pathlen2;
 
+       /* MClientRequest tail */
+
+       /* req->r_stamp */
+       len += sizeof(struct ceph_timespec);
+
+       /* gid list */
+       len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+
+       /* alternate name */
+       len += sizeof(u32) + req->r_altname_len;
+
+       /* fscrypt_auth */
+       len += sizeof(u32); // fscrypt_auth
+       if (req->r_fscrypt_auth)
+               len += ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+       /* fscrypt_file */
+       len += sizeof(u32);
+       if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags))
+               len += sizeof(__le64);
+
        msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
        if (!msg) {
                msg = ERR_PTR(-ENOMEM);
@@ -2624,33 +2964,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 
        msg->hdr.tid = cpu_to_le64(req->r_tid);
 
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+
        /*
-        * The old ceph_mds_request_head didn't contain a version field, and
+        * The ceph_mds_request_head_legacy didn't contain a version field, and
         * one was added when we moved the message version from 3->4.
         */
        if (legacy) {
                msg->hdr.version = cpu_to_le16(3);
-               head = msg->front.iov_base;
-               p = msg->front.iov_base + sizeof(*head);
-       } else {
-               struct ceph_mds_request_head *new_head = msg->front.iov_base;
+               p = msg->front.iov_base + sizeof(*lhead);
+       } else if (old_version) {
+               struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
 
                msg->hdr.version = cpu_to_le16(4);
-               new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
-               head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-               p = msg->front.iov_base + sizeof(*new_head);
+               ohead->version = cpu_to_le16(1);
+               p = msg->front.iov_base + sizeof(*ohead);
+       } else {
+               struct ceph_mds_request_head *nhead = msg->front.iov_base;
+
+               msg->hdr.version = cpu_to_le16(6);
+               nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+               p = msg->front.iov_base + sizeof(*nhead);
        }
 
        end = msg->front.iov_base + msg->front.iov_len;
 
-       head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
-       head->op = cpu_to_le32(req->r_op);
-       head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
-                                                req->r_cred->fsuid));
-       head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
-                                                req->r_cred->fsgid));
-       head->ino = cpu_to_le64(req->r_deleg_ino);
-       head->args = req->r_args;
+       lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+       lhead->op = cpu_to_le32(req->r_op);
+       lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+                                                 req->r_cred->fsuid));
+       lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+                                                 req->r_cred->fsgid));
+       lhead->ino = cpu_to_le64(req->r_deleg_ino);
+       lhead->args = req->r_args;
 
        ceph_encode_filepath(&p, end, ino1, path1);
        ceph_encode_filepath(&p, end, ino2, path2);
@@ -2665,15 +3012,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                      req->r_inode ? req->r_inode : d_inode(req->r_dentry),
                      mds, req->r_inode_drop, req->r_inode_unless,
                      req->r_op == CEPH_MDS_OP_READDIR);
-       if (req->r_dentry_drop)
-               releases += ceph_encode_dentry_release(&p, req->r_dentry,
+       if (req->r_dentry_drop) {
+               re= ceph_encode_dentry_release(&p, req->r_dentry,
                                req->r_parent, mds, req->r_dentry_drop,
                                req->r_dentry_unless);
-       if (req->r_old_dentry_drop)
-               releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
+               if (ret < 0)
+                       goto out_err;
+               releases += ret;
+       }
+       if (req->r_old_dentry_drop) {
+               ret = ceph_encode_dentry_release(&p, req->r_old_dentry,
                                req->r_old_dentry_dir, mds,
                                req->r_old_dentry_drop,
                                req->r_old_dentry_unless);
+               if (ret < 0)
+                       goto out_err;
+               releases += ret;
+       }
        if (req->r_old_inode_drop)
                releases += ceph_encode_inode_release(&p,
                      d_inode(req->r_old_dentry),
@@ -2684,9 +3039,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                p = msg->front.iov_base + req->r_request_release_offset;
        }
 
-       head->num_releases = cpu_to_le16(releases);
+       lhead->num_releases = cpu_to_le16(releases);
 
-       encode_timestamp_and_gids(&p, req);
+       encode_mclientrequest_tail(&p, req);
 
        if (WARN_ON_ONCE(p > end)) {
                ceph_msg_put(msg);
@@ -2715,6 +3070,10 @@ out_free1:
                ceph_mdsc_free_path((char *)path1, pathlen1);
 out:
        return msg;
+out_err:
+       ceph_msg_put(msg);
+       msg = ERR_PTR(ret);
+       goto out_free2;
 }
 
 /*
@@ -2731,18 +3090,6 @@ static void complete_request(struct ceph_mds_client *mdsc,
        complete_all(&req->r_completion);
 }
 
-static struct ceph_mds_request_head_old *
-find_old_request_head(void *p, u64 features)
-{
-       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
-       struct ceph_mds_request_head *new_head;
-
-       if (legacy)
-               return (struct ceph_mds_request_head_old *)p;
-       new_head = (struct ceph_mds_request_head *)p;
-       return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-}
-
 /*
  * called under mdsc->mutex
  */
@@ -2752,29 +3099,28 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 {
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
-       struct ceph_mds_request_head_old *rhead;
+       struct ceph_mds_request_head_legacy *lhead;
+       struct ceph_mds_request_head *nhead;
        struct ceph_msg *msg;
-       int flags = 0, max_retry;
+       int flags = 0, old_max_retry;
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        /*
-        * The type of 'r_attempts' in kernel 'ceph_mds_request'
-        * is 'int', while in 'ceph_mds_request_head' the type of
-        * 'num_retry' is '__u8'. So in case the request retries
-        *  exceeding 256 times, the MDS will receive a incorrect
-        *  retry seq.
-        *
-        * In this case it's ususally a bug in MDS and continue
-        * retrying the request makes no sense.
-        *
-        * In future this could be fixed in ceph code, so avoid
-        * using the hardcode here.
+        * Avoid inifinite retrying after overflow. The client will
+        * increase the retry count and if the MDS is old version,
+        * so we limit to retry at most 256 times.
         */
-       max_retry = sizeof_field(struct ceph_mds_request_head, num_retry);
-       max_retry = 1 << (max_retry * BITS_PER_BYTE);
-       if (req->r_attempts >= max_retry) {
-               pr_warn_ratelimited("%s request tid %llu seq overflow\n",
-                                   __func__, req->r_tid);
-               return -EMULTIHOP;
+       if (req->r_attempts) {
+              old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+                                           num_retry);
+              old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
+              if ((old_version && req->r_attempts >= old_max_retry) ||
+                  ((uint32_t)req->r_attempts >= U32_MAX)) {
+                       pr_warn_ratelimited("%s request tid %llu seq overflow\n",
+                                           __func__, req->r_tid);
+                       return -EMULTIHOP;
+              }
        }
 
        req->r_attempts++;
@@ -2800,23 +3146,27 @@ static int __prepare_send_request(struct ceph_mds_session *session,
                 * d_move mangles the src name.
                 */
                msg = req->r_request;
-               rhead = find_old_request_head(msg->front.iov_base,
-                                             session->s_con.peer_features);
+               lhead = find_legacy_request_head(msg->front.iov_base,
+                                                session->s_con.peer_features);
 
-               flags = le32_to_cpu(rhead->flags);
+               flags = le32_to_cpu(lhead->flags);
                flags |= CEPH_MDS_FLAG_REPLAY;
-               rhead->flags = cpu_to_le32(flags);
+               lhead->flags = cpu_to_le32(flags);
 
                if (req->r_target_inode)
-                       rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+                       lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
 
-               rhead->num_retry = req->r_attempts - 1;
+               lhead->num_retry = req->r_attempts - 1;
+               if (!old_version) {
+                       nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+                       nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+               }
 
                /* remove cap/dentry releases from message */
-               rhead->num_releases = 0;
+               lhead->num_releases = 0;
 
                p = msg->front.iov_base + req->r_request_release_offset;
-               encode_timestamp_and_gids(&p, req);
+               encode_mclientrequest_tail(&p, req);
 
                msg->front.iov_len = p - msg->front.iov_base;
                msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
@@ -2834,18 +3184,23 @@ static int __prepare_send_request(struct ceph_mds_session *session,
        }
        req->r_request = msg;
 
-       rhead = find_old_request_head(msg->front.iov_base,
-                                     session->s_con.peer_features);
-       rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+       lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
        if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_REPLAY;
        if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_ASYNC;
        if (req->r_parent)
                flags |= CEPH_MDS_FLAG_WANT_DENTRY;
-       rhead->flags = cpu_to_le32(flags);
-       rhead->num_fwd = req->r_num_fwd;
-       rhead->num_retry = req->r_attempts - 1;
+       lhead->flags = cpu_to_le32(flags);
+       lhead->num_fwd = req->r_num_fwd;
+       lhead->num_retry = req->r_attempts - 1;
+       if (!old_version) {
+               nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+               nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd);
+               nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+       }
 
        dout(" r_parent = %p\n", req->r_parent);
        return 0;
@@ -3348,22 +3703,35 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        }
 
        dout("handle_reply tid %lld result %d\n", tid, result);
-       rinfo = &req->r_reply_info;
        if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
-               err = parse_reply_info(session, msg, rinfo, (u64)-1);
+               err = parse_reply_info(session, msg, req, (u64)-1);
        else
-               err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
+               err = parse_reply_info(session, msg, req,
+                                      session->s_con.peer_features);
        mutex_unlock(&mdsc->mutex);
 
        /* Must find target inode outside of mutexes to avoid deadlocks */
+       rinfo = &req->r_reply_info;
        if ((err >= 0) && rinfo->head->is_target) {
-               struct inode *in;
+               struct inode *in = xchg(&req->r_new_inode, NULL);
                struct ceph_vino tvino = {
                        .ino  = le64_to_cpu(rinfo->targeti.in->ino),
                        .snap = le64_to_cpu(rinfo->targeti.in->snapid)
                };
 
-               in = ceph_get_inode(mdsc->fsc->sb, tvino);
+               /*
+                * If we ended up opening an existing inode, discard
+                * r_new_inode
+                */
+               if (req->r_op == CEPH_MDS_OP_CREATE &&
+                   !req->r_reply_info.has_create_ino) {
+                       /* This should never happen on an async create */
+                       WARN_ON_ONCE(req->r_deleg_ino);
+                       iput(in);
+                       in = NULL;
+               }
+
+               in = ceph_get_inode(mdsc->fsc->sb, tvino, in);
                if (IS_ERR(in)) {
                        err = PTR_ERR(in);
                        mutex_lock(&session->s_mutex);
@@ -3406,7 +3774,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        if (err == 0) {
                if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
                                    req->r_op == CEPH_MDS_OP_LSSNAP))
-                       ceph_readdir_prepopulate(req, req->r_session);
+                       err = ceph_readdir_prepopulate(req, req->r_session);
        }
        current->journal_info = NULL;
        mutex_unlock(&req->r_fill_mutex);
@@ -3491,33 +3859,21 @@ static void handle_forward(struct ceph_mds_client *mdsc,
        if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                dout("forward tid %llu aborted, unregistering\n", tid);
                __unregister_request(mdsc, req);
-       } else if (fwd_seq <= req->r_num_fwd) {
+       } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
                /*
-                * The type of 'num_fwd' in ceph 'MClientRequestForward'
-                * is 'int32_t', while in 'ceph_mds_request_head' the
-                * type is '__u8'. So in case the request bounces between
-                * MDSes exceeding 256 times, the client will get stuck.
-                *
-                * In this case it's ususally a bug in MDS and continue
-                * bouncing the request makes no sense.
+                * Avoid inifinite retrying after overflow.
                 *
-                * In future this could be fixed in ceph code, so avoid
-                * using the hardcode here.
+                * The MDS will increase the fwd count and in client side
+                * if the num_fwd is less than the one saved in request
+                * that means the MDS is an old version and overflowed of
+                * 8 bits.
                 */
-               int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
-               max = 1 << (max * BITS_PER_BYTE);
-               if (req->r_num_fwd >= max) {
-                       mutex_lock(&req->r_fill_mutex);
-                       req->r_err = -EMULTIHOP;
-                       set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
-                       mutex_unlock(&req->r_fill_mutex);
-                       aborted = true;
-                       pr_warn_ratelimited("forward tid %llu seq overflow\n",
-                                           tid);
-               } else {
-                       dout("forward tid %llu to mds%d - old seq %d <= %d\n",
-                            tid, next_mds, req->r_num_fwd, fwd_seq);
-               }
+               mutex_lock(&req->r_fill_mutex);
+               req->r_err = -EMULTIHOP;
+               set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+               mutex_unlock(&req->r_fill_mutex);
+               aborted = true;
+               pr_warn_ratelimited("forward tid %llu seq overflow\n", tid);
        } else {
                /* resend. forward race not possible; mds would drop */
                dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@@ -4550,6 +4906,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
        dout("handle_lease from mds%d\n", mds);
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
                goto bad;
@@ -4568,8 +4927,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
             dname.len, dname.name);
 
        mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-
        if (!inode) {
                dout("handle_lease no inode %llx\n", vino.ino);
                goto release;
@@ -4631,9 +4988,13 @@ release:
 out:
        mutex_unlock(&session->s_mutex);
        iput(inode);
+
+       ceph_dec_mds_stopping_blocker(mdsc);
        return;
 
 bad:
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        pr_err("corrupt lease message\n");
        ceph_msg_dump(msg);
 }
@@ -4829,6 +5190,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        }
 
        init_completion(&mdsc->safe_umount_waiters);
+       spin_lock_init(&mdsc->stopping_lock);
+       atomic_set(&mdsc->stopping_blockers, 0);
+       init_completion(&mdsc->stopping_waiter);
        init_waitqueue_head(&mdsc->session_close_wq);
        INIT_LIST_HEAD(&mdsc->waiting_for_map);
        mdsc->quotarealms_inodes = RB_ROOT;
index 86d2965..5a3714b 100644 (file)
@@ -32,8 +32,9 @@ enum ceph_feature_type {
        CEPHFS_FEATURE_ALTERNATE_NAME,
        CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
        CEPHFS_FEATURE_OP_GETVXATTR,
+       CEPHFS_FEATURE_32BITS_RETRY_FWD,
 
-       CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR,
+       CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD,
 };
 
 #define CEPHFS_FEATURES_CLIENT_SUPPORTED {     \
@@ -44,8 +45,10 @@ enum ceph_feature_type {
        CEPHFS_FEATURE_MULTI_RECONNECT,         \
        CEPHFS_FEATURE_DELEG_INO,               \
        CEPHFS_FEATURE_METRIC_COLLECT,          \
+       CEPHFS_FEATURE_ALTERNATE_NAME,          \
        CEPHFS_FEATURE_NOTIFY_SESSION_STATE,    \
        CEPHFS_FEATURE_OP_GETVXATTR,            \
+       CEPHFS_FEATURE_32BITS_RETRY_FWD,        \
 }
 
 /*
@@ -86,13 +89,19 @@ struct ceph_mds_reply_info_in {
        s32 dir_pin;
        struct ceph_timespec btime;
        struct ceph_timespec snap_btime;
+       u8 *fscrypt_auth;
+       u8 *fscrypt_file;
+       u32 fscrypt_auth_len;
+       u32 fscrypt_file_len;
        u64 rsnaps;
        u64 change_attr;
 };
 
 struct ceph_mds_reply_dir_entry {
+       bool                          is_nokey;
        char                          *name;
        u32                           name_len;
+       u32                           raw_hash;
        struct ceph_mds_reply_lease   *lease;
        struct ceph_mds_reply_info_in inode;
        loff_t                        offset;
@@ -116,7 +125,9 @@ struct ceph_mds_reply_info_parsed {
        struct ceph_mds_reply_info_in diri, targeti;
        struct ceph_mds_reply_dirfrag *dirfrag;
        char                          *dname;
+       u8                            *altname;
        u32                           dname_len;
+       u32                           altname_len;
        struct ceph_mds_reply_lease   *dlease;
        struct ceph_mds_reply_xattr   xattr_info;
 
@@ -263,6 +274,7 @@ struct ceph_mds_request {
 
        struct inode *r_parent;             /* parent dir inode */
        struct inode *r_target_inode;       /* resulting inode */
+       struct inode *r_new_inode;          /* new inode (for creates) */
 
 #define CEPH_MDS_R_DIRECT_IS_HASH      (1) /* r_direct_hash is valid */
 #define CEPH_MDS_R_ABORTED             (2) /* call was aborted */
@@ -272,11 +284,19 @@ struct ceph_mds_request {
 #define CEPH_MDS_R_DID_PREPOPULATE     (6) /* prepopulated readdir */
 #define CEPH_MDS_R_PARENT_LOCKED       (7) /* is r_parent->i_rwsem wlocked? */
 #define CEPH_MDS_R_ASYNC               (8) /* async request */
+#define CEPH_MDS_R_FSCRYPT_FILE                (9) /* must marshal fscrypt_file field */
        unsigned long   r_req_flags;
 
        struct mutex r_fill_mutex;
 
        union ceph_mds_request_args r_args;
+
+       struct ceph_fscrypt_auth *r_fscrypt_auth;
+       u64     r_fscrypt_file;
+
+       u8 *r_altname;              /* fscrypt binary crypttext for long filenames */
+       u32 r_altname_len;          /* length of r_altname */
+
        int r_fmode;        /* file mode, if expecting cap */
        int r_request_release_offset;
        const struct cred *r_cred;
@@ -381,8 +401,9 @@ struct cap_wait {
 };
 
 enum {
-       CEPH_MDSC_STOPPING_BEGIN = 1,
-       CEPH_MDSC_STOPPING_FLUSHED = 2,
+       CEPH_MDSC_STOPPING_BEGIN = 1,
+       CEPH_MDSC_STOPPING_FLUSHING = 2,
+       CEPH_MDSC_STOPPING_FLUSHED = 3,
 };
 
 /*
@@ -401,7 +422,11 @@ struct ceph_mds_client {
        struct ceph_mds_session **sessions;    /* NULL for mds if no session */
        atomic_t                num_sessions;
        int                     max_sessions;  /* len of sessions array */
-       int                     stopping;      /* true if shutting down */
+
+       spinlock_t              stopping_lock;  /* protect snap_empty */
+       int                     stopping;      /* the stage of shutting down */
+       atomic_t                stopping_blockers;
+       struct completion       stopping_waiter;
 
        atomic64_t              quotarealms_count; /* # realms with quota */
        /*
@@ -557,7 +582,7 @@ static inline void ceph_mdsc_free_path(char *path, int len)
 }
 
 extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
-                                 int stop_on_nosnap);
+                                 int for_wire);
 
 extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
 extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
index 64592ad..f7fcf7f 100644 (file)
@@ -47,25 +47,23 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
        struct inode *inode;
        struct ceph_inode_info *ci;
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        if (msg->front.iov_len < sizeof(*h)) {
                pr_err("%s corrupt message mds%d len %d\n", __func__,
                       session->s_mds, (int)msg->front.iov_len);
                ceph_msg_dump(msg);
-               return;
+               goto out;
        }
 
-       /* increment msg sequence number */
-       mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-       mutex_unlock(&session->s_mutex);
-
        /* lookup inode */
        vino.ino = le64_to_cpu(h->ino);
        vino.snap = CEPH_NOSNAP;
        inode = ceph_find_inode(sb, vino);
        if (!inode) {
                pr_warn("Failed to find inode %llu\n", vino.ino);
-               return;
+               goto out;
        }
        ci = ceph_inode(inode);
 
@@ -78,6 +76,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
        spin_unlock(&ci->i_ceph_lock);
 
        iput(inode);
+out:
+       ceph_dec_mds_stopping_blocker(mdsc);
 }
 
 static struct ceph_quotarealm_inode *
index c9920ad..813f21a 100644 (file)
@@ -1015,6 +1015,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
        int locked_rwsem = 0;
        bool close_sessions = false;
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        if (msg->front.iov_len < sizeof(*h))
                goto bad;
@@ -1030,10 +1033,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
        dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
             mds, ceph_snap_op_name(op), split, trace_len);
 
-       mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-       mutex_unlock(&session->s_mutex);
-
        down_write(&mdsc->snap_rwsem);
        locked_rwsem = 1;
 
@@ -1151,6 +1150,7 @@ skip_inode:
        up_write(&mdsc->snap_rwsem);
 
        flush_snaps(mdsc);
+       ceph_dec_mds_stopping_blocker(mdsc);
        return;
 
 bad:
@@ -1160,6 +1160,8 @@ out:
        if (locked_rwsem)
                up_write(&mdsc->snap_rwsem);
 
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        if (close_sessions)
                ceph_mdsc_close_sessions(mdsc);
        return;
index a5f5201..2d7f5a8 100644 (file)
@@ -20,6 +20,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 
 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/decode.h>
@@ -46,6 +47,7 @@ static void ceph_put_super(struct super_block *s)
        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 
        dout("put_super\n");
+       ceph_fscrypt_free_dummy_policy(fsc);
        ceph_mdsc_close_sessions(fsc->mdsc);
 }
 
@@ -151,6 +153,7 @@ enum {
        Opt_recover_session,
        Opt_source,
        Opt_mon_addr,
+       Opt_test_dummy_encryption,
        /* string args above */
        Opt_dirstat,
        Opt_rbytes,
@@ -165,6 +168,7 @@ enum {
        Opt_copyfrom,
        Opt_wsync,
        Opt_pagecache,
+       Opt_sparseread,
 };
 
 enum ceph_recover_session_mode {
@@ -192,6 +196,7 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
        fsparam_string  ("fsc",                         Opt_fscache), // fsc=...
        fsparam_flag_no ("ino32",                       Opt_ino32),
        fsparam_string  ("mds_namespace",               Opt_mds_namespace),
+       fsparam_string  ("mon_addr",                    Opt_mon_addr),
        fsparam_flag_no ("poolperm",                    Opt_poolperm),
        fsparam_flag_no ("quotadf",                     Opt_quotadf),
        fsparam_u32     ("rasize",                      Opt_rasize),
@@ -203,10 +208,12 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
        fsparam_u32     ("rsize",                       Opt_rsize),
        fsparam_string  ("snapdirname",                 Opt_snapdirname),
        fsparam_string  ("source",                      Opt_source),
-       fsparam_string  ("mon_addr",                    Opt_mon_addr),
+       fsparam_flag    ("test_dummy_encryption",       Opt_test_dummy_encryption),
+       fsparam_string  ("test_dummy_encryption",       Opt_test_dummy_encryption),
        fsparam_u32     ("wsize",                       Opt_wsize),
        fsparam_flag_no ("wsync",                       Opt_wsync),
        fsparam_flag_no ("pagecache",                   Opt_pagecache),
+       fsparam_flag_no ("sparseread",                  Opt_sparseread),
        {}
 };
 
@@ -576,6 +583,29 @@ static int ceph_parse_mount_param(struct fs_context *fc,
                else
                        fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
                break;
+       case Opt_sparseread:
+               if (result.negated)
+                       fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
+               else
+                       fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
+               break;
+       case Opt_test_dummy_encryption:
+#ifdef CONFIG_FS_ENCRYPTION
+               fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
+               ret = fscrypt_parse_test_dummy_encryption(param,
+                                               &fsopt->dummy_enc_policy);
+               if (ret == -EINVAL) {
+                       warnfc(fc, "Value of option \"%s\" is unrecognized",
+                              param->key);
+               } else if (ret == -EEXIST) {
+                       warnfc(fc, "Conflicting test_dummy_encryption options");
+                       ret = -EINVAL;
+               }
+#else
+               warnfc(fc,
+                      "FS encryption not supported: test_dummy_encryption mount option ignored");
+#endif
+               break;
        default:
                BUG();
        }
@@ -596,6 +626,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
        kfree(args->server_path);
        kfree(args->fscache_uniq);
        kfree(args->mon_addr);
+       fscrypt_free_dummy_policy(&args->dummy_enc_policy);
        kfree(args);
 }
 
@@ -710,9 +741,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 
        if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
                seq_puts(m, ",wsync");
-
        if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
                seq_puts(m, ",nopagecache");
+       if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+               seq_puts(m, ",sparseread");
+
+       fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
 
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%u", fsopt->wsize);
@@ -1052,6 +1086,50 @@ out:
        return root;
 }
 
+#ifdef CONFIG_FS_ENCRYPTION
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+                                           struct fs_context *fc,
+                                           struct ceph_mount_options *fsopt)
+{
+       struct ceph_fs_client *fsc = sb->s_fs_info;
+
+       if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
+               return 0;
+
+       /* No changing encryption context on remount. */
+       if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
+           !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+               if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+                                                &fsc->fsc_dummy_enc_policy))
+                       return 0;
+               errorfc(fc, "Can't set test_dummy_encryption on remount");
+               return -EINVAL;
+       }
+
+       /* Also make sure fsopt doesn't contain a conflicting value. */
+       if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+               if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+                                                &fsc->fsc_dummy_enc_policy))
+                       return 0;
+               errorfc(fc, "Conflicting test_dummy_encryption options");
+               return -EINVAL;
+       }
+
+       fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
+       memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
+
+       warnfc(fc, "test_dummy_encryption mode enabled");
+       return 0;
+}
+#else
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+                                           struct fs_context *fc,
+                                           struct ceph_mount_options *fsopt)
+{
+       return 0;
+}
+#endif
+
 /*
  * mount: join the ceph cluster, and open root directory.
  */
@@ -1080,6 +1158,11 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
                                goto out;
                }
 
+               err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
+                                                      fsc->mount_options);
+               if (err)
+                       goto out;
+
                dout("mount opening path '%s'\n", path);
 
                ceph_fs_debugfs_init(fsc);
@@ -1101,6 +1184,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 
 out:
        mutex_unlock(&fsc->client->mount_mutex);
+       ceph_fscrypt_free_dummy_policy(fsc);
        return ERR_PTR(err);
 }
 
@@ -1126,6 +1210,8 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
        s->s_time_max = U32_MAX;
        s->s_flags |= SB_NODIRATIME | SB_NOATIME;
 
+       ceph_fscrypt_set_ops(s);
+
        ret = set_anon_super_fc(s, fc);
        if (ret != 0)
                fsc->sb = NULL;
@@ -1287,15 +1373,26 @@ static void ceph_free_fc(struct fs_context *fc)
 
 static int ceph_reconfigure_fc(struct fs_context *fc)
 {
+       int err;
        struct ceph_parse_opts_ctx *pctx = fc->fs_private;
        struct ceph_mount_options *fsopt = pctx->opts;
-       struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb);
+       struct super_block *sb = fc->root->d_sb;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+       err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
+       if (err)
+               return err;
 
        if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
                ceph_set_mount_opt(fsc, ASYNC_DIROPS);
        else
                ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
 
+       if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+               ceph_set_mount_opt(fsc, SPARSEREAD);
+       else
+               ceph_clear_mount_opt(fsc, SPARSEREAD);
+
        if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
                kfree(fsc->mount_options->mon_addr);
                fsc->mount_options->mon_addr = fsopt->mon_addr;
@@ -1303,7 +1400,7 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
                pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
        }
 
-       sync_filesystem(fc->root->d_sb);
+       sync_filesystem(sb);
        return 0;
 }
 
@@ -1365,25 +1462,101 @@ nomem:
        return -ENOMEM;
 }
 
+/*
+ * Return true if it successfully increases the blocker counter,
+ * or false if the mdsc is in stopping and flushed state.
+ */
+static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       spin_lock(&mdsc->stopping_lock);
+       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
+               spin_unlock(&mdsc->stopping_lock);
+               return false;
+       }
+       atomic_inc(&mdsc->stopping_blockers);
+       spin_unlock(&mdsc->stopping_lock);
+       return true;
+}
+
+static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       spin_lock(&mdsc->stopping_lock);
+       if (!atomic_dec_return(&mdsc->stopping_blockers) &&
+           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
+               complete_all(&mdsc->stopping_waiter);
+       spin_unlock(&mdsc->stopping_lock);
+}
+
+/* For metadata IO requests */
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+                                  struct ceph_mds_session *session)
+{
+       mutex_lock(&session->s_mutex);
+       inc_session_sequence(session);
+       mutex_unlock(&session->s_mutex);
+
+       return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       __dec_stopping_blocker(mdsc);
+}
+
+/* For data IO requests */
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       __dec_stopping_blocker(mdsc);
+}
+
 static void ceph_kill_sb(struct super_block *s)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       bool wait;
 
        dout("kill_sb %p\n", s);
 
-       ceph_mdsc_pre_umount(fsc->mdsc);
+       ceph_mdsc_pre_umount(mdsc);
        flush_fs_workqueues(fsc);
 
        /*
         * Though the kill_anon_super() will finally trigger the
-        * sync_filesystem() anyway, we still need to do it here
-        * and then bump the stage of shutdown to stop the work
-        * queue as earlier as possible.
+        * sync_filesystem() anyway, we still need to do it here and
+        * then bump the stage of shutdown. This will allow us to
+        * drop any further message, which will increase the inodes'
+        * i_count reference counters but makes no sense any more,
+        * from MDSs.
+        *
+        * Without this when evicting the inodes it may fail in the
+        * kill_anon_super(), which will trigger a warning when
+        * destroying the fscrypt keyring and then possibly trigger
+        * a further crash in ceph module when the iput() tries to
+        * evict the inodes later.
         */
        sync_filesystem(s);
 
-       fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+       spin_lock(&mdsc->stopping_lock);
+       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
+       wait = !!atomic_read(&mdsc->stopping_blockers);
+       spin_unlock(&mdsc->stopping_lock);
+
+       if (wait && atomic_read(&mdsc->stopping_blockers)) {
+               long timeleft = wait_for_completion_killable_timeout(
+                                       &mdsc->stopping_waiter,
+                                       fsc->client->options->mount_timeout);
+               if (!timeleft) /* timed out */
+                       pr_warn("umount timed out, %ld\n", timeleft);
+               else if (timeleft < 0) /* killed */
+                       pr_warn("umount was killed, %ld\n", timeleft);
+       }
 
+       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
        kill_anon_super(s);
 
        fsc->client->extra_mon_dispatch = NULL;
index 3bfddf3..51c7f2b 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/hashtable.h>
 
 #include <linux/ceph/libceph.h>
+#include "crypto.h"
 
 /* large granularity for statfs utilization stats to facilitate
  * large volume sizes on 32-bit machines. */
@@ -42,6 +43,7 @@
 #define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
 #define CEPH_MOUNT_OPT_ASYNC_DIROPS    (1<<15) /* allow async directory ops */
 #define CEPH_MOUNT_OPT_NOPAGECACHE     (1<<16) /* bypass pagecache altogether */
+#define CEPH_MOUNT_OPT_SPARSEREAD      (1<<17) /* always do sparse reads */
 
 #define CEPH_MOUNT_OPT_DEFAULT                 \
        (CEPH_MOUNT_OPT_DCACHE |                \
@@ -98,6 +100,7 @@ struct ceph_mount_options {
        char *server_path;    /* default NULL (means "/") */
        char *fscache_uniq;   /* default NULL */
        char *mon_addr;
+       struct fscrypt_dummy_policy dummy_enc_policy;
 };
 
 /* mount state */
@@ -154,9 +157,11 @@ struct ceph_fs_client {
 #ifdef CONFIG_CEPH_FSCACHE
        struct fscache_volume *fscache;
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       struct fscrypt_dummy_policy fsc_dummy_enc_policy;
+#endif
 };
 
-
 /*
  * File i/o capability.  This tracks shared state with the metadata
  * server that allows us to cache or writeback attributes or to read
@@ -419,6 +424,11 @@ struct ceph_inode_info {
        u32 i_truncate_seq;        /* last truncate to smaller size */
        u64 i_truncate_size;       /*  and the size we last truncated down to */
        int i_truncate_pending;    /*  still need to call vmtruncate */
+       /*
+        * For none fscrypt case it equals to i_truncate_size or it will
+        * equals to fscrypt_file_size
+        */
+       u64 i_truncate_pagecache_size;
 
        u64 i_max_size;            /* max file size authorized by mds */
        u64 i_reported_size; /* (max_)size reported to or requested of mds */
@@ -449,6 +459,13 @@ struct ceph_inode_info {
 
        struct work_struct i_work;
        unsigned long  i_work_mask;
+
+#ifdef CONFIG_FS_ENCRYPTION
+       u32 fscrypt_auth_len;
+       u32 fscrypt_file_len;
+       u8 *fscrypt_auth;
+       u8 *fscrypt_file;
+#endif
 };
 
 struct ceph_netfs_request_data {
@@ -998,6 +1015,7 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 /* inode.c */
 struct ceph_mds_reply_info_in;
 struct ceph_mds_reply_dirfrag;
+struct ceph_acl_sec_ctx;
 
 extern const struct inode_operations ceph_file_iops;
 
@@ -1005,8 +1023,14 @@ extern struct inode *ceph_alloc_inode(struct super_block *sb);
 extern void ceph_evict_inode(struct inode *inode);
 extern void ceph_free_inode(struct inode *inode);
 
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+                            umode_t *mode, struct ceph_acl_sec_ctx *as_ctx);
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+                       struct ceph_acl_sec_ctx *as_ctx);
+
 extern struct inode *ceph_get_inode(struct super_block *sb,
-                                   struct ceph_vino vino);
+                                   struct ceph_vino vino,
+                                   struct inode *newino);
 extern struct inode *ceph_get_snapdir(struct inode *parent);
 extern int ceph_fill_file_size(struct inode *inode, int issued,
                               u32 truncate_seq, u64 truncate_size, u64 size);
@@ -1065,7 +1089,13 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 }
 extern int ceph_permission(struct mnt_idmap *idmap,
                           struct inode *inode, int mask);
-extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
+
+struct ceph_iattr {
+       struct ceph_fscrypt_auth        *fscrypt_auth;
+};
+
+extern int __ceph_setattr(struct inode *inode, struct iattr *attr,
+                         struct ceph_iattr *cia);
 extern int ceph_setattr(struct mnt_idmap *idmap,
                        struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct mnt_idmap *idmap,
@@ -1100,6 +1130,9 @@ struct ceph_acl_sec_ctx {
        void *sec_ctx;
        u32 sec_ctxlen;
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       struct ceph_fscrypt_auth *fscrypt_auth;
+#endif
        struct ceph_pagelist *pagelist;
 };
 
@@ -1237,6 +1270,8 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
                                      struct inode *dir,
                                      int mds, int drop, int unless);
 
+extern int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi,
+                          int need, int want, loff_t endoff, int *got);
 extern int ceph_get_caps(struct file *filp, int need, int want,
                         loff_t endoff, int *got);
 extern int ceph_try_get_caps(struct inode *inode,
@@ -1272,6 +1307,9 @@ extern int ceph_renew_caps(struct inode *inode, int fmode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                            struct file *file, unsigned flags, umode_t mode);
+extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                               struct iov_iter *to, int *retry_op,
+                               u64 *last_objver);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                                  char *data, size_t len);
@@ -1375,4 +1413,9 @@ extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
                                     struct kstatfs *buf);
 extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
 
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+                              struct ceph_mds_session *session);
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc);
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc);
 #endif /* _FS_CEPH_SUPER_H */
index 1cbd84c..0deae4a 100644 (file)
@@ -352,6 +352,24 @@ static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
        return ret;
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static bool ceph_vxattrcb_fscrypt_auth_exists(struct ceph_inode_info *ci)
+{
+       return ci->fscrypt_auth_len;
+}
+
+static ssize_t ceph_vxattrcb_fscrypt_auth(struct ceph_inode_info *ci,
+                                         char *val, size_t size)
+{
+       if (size) {
+               if (size < ci->fscrypt_auth_len)
+                       return -ERANGE;
+               memcpy(val, ci->fscrypt_auth, ci->fscrypt_auth_len);
+       }
+       return ci->fscrypt_auth_len;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
 #define CEPH_XATTR_NAME(_type, _name)  XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2) \
        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@@ -500,6 +518,15 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
                .exists_cb = NULL,
                .flags = VXATTR_FLAG_READONLY,
        },
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       {
+               .name = "ceph.fscrypt.auth",
+               .name_size = sizeof("ceph.fscrypt.auth"),
+               .getxattr_cb = ceph_vxattrcb_fscrypt_auth,
+               .exists_cb = ceph_vxattrcb_fscrypt_auth_exists,
+               .flags = VXATTR_FLAG_READONLY,
+       },
+#endif /* CONFIG_FS_ENCRYPTION */
        { .name = NULL, 0 }     /* Required table terminator */
 };
 
@@ -1408,6 +1435,9 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
        security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       kfree(as_ctx->fscrypt_auth);
+#endif
        if (as_ctx->pagelist)
                ceph_pagelist_release(as_ctx->pagelist);
 }
index 881524b..d707e69 100644 (file)
@@ -92,7 +92,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
 /*
  * Calculate the time in jiffies until a dentry/attributes are valid
  */
-static u64 time_to_jiffies(u64 sec, u32 nsec)
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
 {
        if (sec || nsec) {
                struct timespec64 ts = {
@@ -112,17 +112,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
 {
        fuse_dentry_settime(entry,
-               time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
-}
-
-static u64 attr_timeout(struct fuse_attr_out *o)
-{
-       return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
-}
-
-u64 entry_attr_timeout(struct fuse_entry_out *o)
-{
-       return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+               fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
 }
 
 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
@@ -265,8 +255,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                        goto invalid;
 
                forget_all_cached_acls(inode);
-               fuse_change_attributes(inode, &outarg.attr,
-                                      entry_attr_timeout(&outarg),
+               fuse_change_attributes(inode, &outarg.attr, NULL,
+                                      ATTR_TIMEOUT(&outarg),
                                       attr_version);
                fuse_change_entry_timeout(entry, &outarg);
        } else if (inode) {
@@ -360,10 +350,14 @@ int fuse_valid_type(int m)
                S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
 }
 
+static bool fuse_valid_size(u64 size)
+{
+       return size <= LLONG_MAX;
+}
+
 bool fuse_invalid_attr(struct fuse_attr *attr)
 {
-       return !fuse_valid_type(attr->mode) ||
-               attr->size > LLONG_MAX;
+       return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
 }
 
 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
@@ -399,7 +393,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
                goto out_put_forget;
 
        *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
-                          &outarg->attr, entry_attr_timeout(outarg),
+                          &outarg->attr, ATTR_TIMEOUT(outarg),
                           attr_version);
        err = -ENOMEM;
        if (!*inode) {
@@ -686,7 +680,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        ff->nodeid = outentry.nodeid;
        ff->open_flags = outopen.open_flags;
        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
-                         &outentry.attr, entry_attr_timeout(&outentry), 0);
+                         &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
        if (!inode) {
                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
                fuse_sync_release(NULL, ff, flags);
@@ -755,7 +749,8 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
        if (err == -ENOSYS) {
                fc->no_create = 1;
                goto mknod;
-       }
+       } else if (err == -EEXIST)
+               fuse_invalidate_entry(entry);
 out_dput:
        dput(res);
        return err;
@@ -813,7 +808,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
                goto out_put_forget_req;
 
        inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-                         &outarg.attr, entry_attr_timeout(&outarg), 0);
+                         &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
        if (!inode) {
                fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
                return -ENOMEM;
@@ -835,6 +830,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
        return 0;
 
  out_put_forget_req:
+       if (err == -EEXIST)
+               fuse_invalidate_entry(entry);
        kfree(forget);
        return err;
 }
@@ -986,7 +983,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
        if (!err) {
                fuse_dir_changed(dir);
                fuse_entry_unlinked(entry);
-       } else if (err == -EINTR)
+       } else if (err == -EINTR || err == -ENOENT)
                fuse_invalidate_entry(entry);
        return err;
 }
@@ -1009,7 +1006,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        if (!err) {
                fuse_dir_changed(dir);
                fuse_entry_unlinked(entry);
-       } else if (err == -EINTR)
+       } else if (err == -EINTR || err == -ENOENT)
                fuse_invalidate_entry(entry);
        return err;
 }
@@ -1050,7 +1047,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                /* newent will end up negative */
                if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
                        fuse_entry_unlinked(newent);
-       } else if (err == -EINTR) {
+       } else if (err == -EINTR || err == -ENOENT) {
                /* If request was interrupted, DEITY only knows if the
                   rename actually took place.  If the invalidation
                   fails (e.g. some process has CWD under the renamed
@@ -1153,6 +1150,87 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
        stat->blksize = 1 << blkbits;
 }
 
+static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
+{
+       memset(attr, 0, sizeof(*attr));
+       attr->ino = sx->ino;
+       attr->size = sx->size;
+       attr->blocks = sx->blocks;
+       attr->atime = sx->atime.tv_sec;
+       attr->mtime = sx->mtime.tv_sec;
+       attr->ctime = sx->ctime.tv_sec;
+       attr->atimensec = sx->atime.tv_nsec;
+       attr->mtimensec = sx->mtime.tv_nsec;
+       attr->ctimensec = sx->ctime.tv_nsec;
+       attr->mode = sx->mode;
+       attr->nlink = sx->nlink;
+       attr->uid = sx->uid;
+       attr->gid = sx->gid;
+       attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
+       attr->blksize = sx->blksize;
+}
+
+static int fuse_do_statx(struct inode *inode, struct file *file,
+                        struct kstat *stat)
+{
+       int err;
+       struct fuse_attr attr;
+       struct fuse_statx *sx;
+       struct fuse_statx_in inarg;
+       struct fuse_statx_out outarg;
+       struct fuse_mount *fm = get_fuse_mount(inode);
+       u64 attr_version = fuse_get_attr_version(fm->fc);
+       FUSE_ARGS(args);
+
+       memset(&inarg, 0, sizeof(inarg));
+       memset(&outarg, 0, sizeof(outarg));
+       /* Directories have separate file-handle space */
+       if (file && S_ISREG(inode->i_mode)) {
+               struct fuse_file *ff = file->private_data;
+
+               inarg.getattr_flags |= FUSE_GETATTR_FH;
+               inarg.fh = ff->fh;
+       }
+       /* For now leave sync hints as the default, request all stats. */
+       inarg.sx_flags = 0;
+       inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
+       args.opcode = FUSE_STATX;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
+       err = fuse_simple_request(fm, &args);
+       if (err)
+               return err;
+
+       sx = &outarg.stat;
+       if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
+           ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
+                                        inode_wrong_type(inode, sx->mode)))) {
+               make_bad_inode(inode);
+               return -EIO;
+       }
+
+       fuse_statx_to_attr(&outarg.stat, &attr);
+       if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
+               fuse_change_attributes(inode, &attr, &outarg.stat,
+                                      ATTR_TIMEOUT(&outarg), attr_version);
+       }
+
+       if (stat) {
+               stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
+               stat->btime.tv_sec = sx->btime.tv_sec;
+               stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+               fuse_fillattr(inode, &attr, stat);
+               stat->result_mask |= STATX_TYPE;
+       }
+
+       return 0;
+}
+
 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                           struct file *file)
 {
@@ -1189,8 +1267,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                        fuse_make_bad(inode);
                        err = -EIO;
                } else {
-                       fuse_change_attributes(inode, &outarg.attr,
-                                              attr_timeout(&outarg),
+                       fuse_change_attributes(inode, &outarg.attr, NULL,
+                                              ATTR_TIMEOUT(&outarg),
                                               attr_version);
                        if (stat)
                                fuse_fillattr(inode, &outarg.attr, stat);
@@ -1204,12 +1282,22 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
                                unsigned int flags)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
+       struct fuse_conn *fc = get_fuse_conn(inode);
        int err = 0;
        bool sync;
        u32 inval_mask = READ_ONCE(fi->inval_mask);
        u32 cache_mask = fuse_get_cache_mask(inode);
 
-       if (flags & AT_STATX_FORCE_SYNC)
+
+       /* FUSE only supports basic stats and possibly btime */
+       request_mask &= STATX_BASIC_STATS | STATX_BTIME;
+retry:
+       if (fc->no_statx)
+               request_mask &= STATX_BASIC_STATS;
+
+       if (!request_mask)
+               sync = false;
+       else if (flags & AT_STATX_FORCE_SYNC)
                sync = true;
        else if (flags & AT_STATX_DONT_SYNC)
                sync = false;
@@ -1220,11 +1308,24 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
 
        if (sync) {
                forget_all_cached_acls(inode);
-               err = fuse_do_getattr(inode, stat, file);
+               /* Try statx if BTIME is requested */
+               if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
+                       err = fuse_do_statx(inode, file, stat);
+                       if (err == -ENOSYS) {
+                               fc->no_statx = 1;
+                               goto retry;
+                       }
+               } else {
+                       err = fuse_do_getattr(inode, stat, file);
+               }
        } else if (stat) {
                generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                stat->mode = fi->orig_i_mode;
                stat->ino = fi->orig_ino;
+               if (test_bit(FUSE_I_BTIME, &fi->state)) {
+                       stat->btime = fi->i_btime;
+                       stat->result_mask |= STATX_BTIME;
+               }
        }
 
        return err;
@@ -1861,8 +1962,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                /* FIXME: clear I_DIRTY_SYNC? */
        }
 
-       fuse_change_attributes_common(inode, &outarg.attr,
-                                     attr_timeout(&outarg),
+       fuse_change_attributes_common(inode, &outarg.attr, NULL,
+                                     ATTR_TIMEOUT(&outarg),
                                      fuse_get_cache_mask(inode));
        oldsize = inode->i_size;
        /* see the comment in fuse_change_attributes() */
index bc41152..1cdb632 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/uio.h>
 #include <linux/fs.h>
 #include <linux/filelock.h>
-#include <linux/file.h>
 
 static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
                          unsigned int open_flags, int opcode,
@@ -479,36 +478,48 @@ static void fuse_sync_writes(struct inode *inode)
        fuse_release_nowrite(inode);
 }
 
-struct fuse_flush_args {
-       struct fuse_args args;
-       struct fuse_flush_in inarg;
-       struct work_struct work;
-       struct file *file;
-};
-
-static int fuse_do_flush(struct fuse_flush_args *fa)
+static int fuse_flush(struct file *file, fl_owner_t id)
 {
-       int err;
-       struct inode *inode = file_inode(fa->file);
+       struct inode *inode = file_inode(file);
        struct fuse_mount *fm = get_fuse_mount(inode);
+       struct fuse_file *ff = file->private_data;
+       struct fuse_flush_in inarg;
+       FUSE_ARGS(args);
+       int err;
+
+       if (fuse_is_bad(inode))
+               return -EIO;
+
+       if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
+               return 0;
 
        err = write_inode_now(inode, 1);
        if (err)
-               goto out;
+               return err;
 
        inode_lock(inode);
        fuse_sync_writes(inode);
        inode_unlock(inode);
 
-       err = filemap_check_errors(fa->file->f_mapping);
+       err = filemap_check_errors(file->f_mapping);
        if (err)
-               goto out;
+               return err;
 
        err = 0;
        if (fm->fc->no_flush)
                goto inval_attr_out;
 
-       err = fuse_simple_request(fm, &fa->args);
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.fh = ff->fh;
+       inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
+       args.opcode = FUSE_FLUSH;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+
+       err = fuse_simple_request(fm, &args);
        if (err == -ENOSYS) {
                fm->fc->no_flush = 1;
                err = 0;
@@ -521,57 +532,9 @@ inval_attr_out:
         */
        if (!err && fm->fc->writeback_cache)
                fuse_invalidate_attr_mask(inode, STATX_BLOCKS);
-
-out:
-       fput(fa->file);
-       kfree(fa);
        return err;
 }
 
-static void fuse_flush_async(struct work_struct *work)
-{
-       struct fuse_flush_args *fa = container_of(work, typeof(*fa), work);
-
-       fuse_do_flush(fa);
-}
-
-static int fuse_flush(struct file *file, fl_owner_t id)
-{
-       struct fuse_flush_args *fa;
-       struct inode *inode = file_inode(file);
-       struct fuse_mount *fm = get_fuse_mount(inode);
-       struct fuse_file *ff = file->private_data;
-
-       if (fuse_is_bad(inode))
-               return -EIO;
-
-       if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
-               return 0;
-
-       fa = kzalloc(sizeof(*fa), GFP_KERNEL);
-       if (!fa)
-               return -ENOMEM;
-
-       fa->inarg.fh = ff->fh;
-       fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
-       fa->args.opcode = FUSE_FLUSH;
-       fa->args.nodeid = get_node_id(inode);
-       fa->args.in_numargs = 1;
-       fa->args.in_args[0].size = sizeof(fa->inarg);
-       fa->args.in_args[0].value = &fa->inarg;
-       fa->args.force = true;
-       fa->file = get_file(file);
-
-       /* Don't wait if the task is exiting */
-       if (current->flags & PF_EXITING) {
-               INIT_WORK(&fa->work, fuse_flush_async);
-               schedule_work(&fa->work);
-               return 0;
-       }
-
-       return fuse_do_flush(fa);
-}
-
 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                      int datasync, int opcode)
 {
@@ -1465,7 +1428,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        int write = flags & FUSE_DIO_WRITE;
        int cuse = flags & FUSE_DIO_CUSE;
        struct file *file = io->iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
+       struct address_space *mapping = file->f_mapping;
+       struct inode *inode = mapping->host;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fm->fc;
        size_t nmax = write ? fc->max_write : fc->max_read;
@@ -1477,12 +1441,20 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        int err = 0;
        struct fuse_io_args *ia;
        unsigned int max_pages;
+       bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO;
 
        max_pages = iov_iter_npages(iter, fc->max_pages);
        ia = fuse_io_alloc(io, max_pages);
        if (!ia)
                return -ENOMEM;
 
+       if (fopen_direct_io && fc->direct_io_relax) {
+               res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
+               if (res) {
+                       fuse_io_free(ia);
+                       return res;
+               }
+       }
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
                        inode_lock(inode);
@@ -1491,6 +1463,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                        inode_unlock(inode);
        }
 
+       if (fopen_direct_io && write) {
+               res = invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+               if (res) {
+                       fuse_io_free(ia);
+                       return res;
+               }
+       }
+
        io->should_dirty = !write && user_backed_iter(iter);
        while (count) {
                ssize_t nres;
@@ -2478,14 +2458,17 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fm->fc;
 
        /* DAX mmap is superior to direct_io mmap */
        if (FUSE_IS_DAX(file_inode(file)))
                return fuse_dax_mmap(file, vma);
 
        if (ff->open_flags & FOPEN_DIRECT_IO) {
-               /* Can't provide the coherency needed for MAP_SHARED */
-               if (vma->vm_flags & VM_MAYSHARE)
+               /* Can't provide the coherency needed for MAP_SHARED
+                * if FUSE_DIRECT_IO_RELAX isn't set.
+                */
+               if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax)
                        return -ENODEV;
 
                invalidate_inode_pages2(file->f_mapping);
index 9b7fc7d..bf0b85d 100644 (file)
@@ -88,6 +88,9 @@ struct fuse_inode {
            preserve the original mode */
        umode_t orig_i_mode;
 
+       /* Cache birthtime */
+       struct timespec64 i_btime;
+
        /** 64 bit inode number */
        u64 orig_ino;
 
@@ -167,6 +170,8 @@ enum {
        FUSE_I_SIZE_UNSTABLE,
        /* Bad inode */
        FUSE_I_BAD,
+       /* Has btime */
+       FUSE_I_BTIME,
 };
 
 struct fuse_conn;
@@ -792,6 +797,12 @@ struct fuse_conn {
        /* Is tmpfile not implemented by fs? */
        unsigned int no_tmpfile:1;
 
+       /* relax restrictions in FOPEN_DIRECT_IO mode */
+       unsigned int direct_io_relax:1;
+
+       /* Is statx not implemented by fs? */
+       unsigned int no_statx:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
@@ -1058,9 +1069,11 @@ void fuse_init_symlink(struct inode *inode);
  * Change attributes of an inode
  */
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                           struct fuse_statx *sx,
                            u64 attr_valid, u64 attr_version);
 
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+                                  struct fuse_statx *sx,
                                   u64 attr_valid, u32 cache_mask);
 
 u32 fuse_get_cache_mask(struct inode *inode);
@@ -1111,7 +1124,10 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
 
 void fuse_invalidate_atime(struct inode *inode);
 
-u64 entry_attr_timeout(struct fuse_entry_out *o);
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec);
+#define ATTR_TIMEOUT(o) \
+       fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec)
+
 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
 
 /**
index 549358f..2e4eb7c 100644 (file)
@@ -77,7 +77,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
                return NULL;
 
        fi->i_time = 0;
-       fi->inval_mask = 0;
+       fi->inval_mask = ~0;
        fi->nodeid = 0;
        fi->nlookup = 0;
        fi->attr_version = 0;
@@ -163,6 +163,7 @@ static ino_t fuse_squash_ino(u64 ino64)
 }
 
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+                                  struct fuse_statx *sx,
                                   u64 attr_valid, u32 cache_mask)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -172,7 +173,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 
        fi->attr_version = atomic64_inc_return(&fc->attr_version);
        fi->i_time = attr_valid;
-       WRITE_ONCE(fi->inval_mask, 0);
+       /* Clear basic stats from invalid mask */
+       set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
 
        inode->i_ino     = fuse_squash_ino(attr->ino);
        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -196,6 +198,25 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
        if (!(cache_mask & STATX_CTIME)) {
                inode_set_ctime(inode, attr->ctime, attr->ctimensec);
        }
+       if (sx) {
+               /* Sanitize nsecs */
+               sx->btime.tv_nsec =
+                       min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+
+               /*
+                * Btime has been queried, cache is valid (whether or not btime
+                * is available or not) so clear STATX_BTIME from inval_mask.
+                *
+                * Availability of the btime attribute is indicated in
+                * FUSE_I_BTIME
+                */
+               set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
+               if (sx->mask & STATX_BTIME) {
+                       set_bit(FUSE_I_BTIME, &fi->state);
+                       fi->i_btime.tv_sec = sx->btime.tv_sec;
+                       fi->i_btime.tv_nsec = sx->btime.tv_nsec;
+               }
+       }
 
        if (attr->blksize != 0)
                inode->i_blkbits = ilog2(attr->blksize);
@@ -235,6 +256,7 @@ u32 fuse_get_cache_mask(struct inode *inode)
 }
 
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                           struct fuse_statx *sx,
                            u64 attr_valid, u64 attr_version)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -269,7 +291,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
        }
 
        old_mtime = inode->i_mtime;
-       fuse_change_attributes_common(inode, attr, attr_valid, cache_mask);
+       fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
 
        oldsize = inode->i_size;
        /*
@@ -406,7 +428,7 @@ done:
        spin_lock(&fi->lock);
        fi->nlookup++;
        spin_unlock(&fi->lock);
-       fuse_change_attributes(inode, attr, attr_valid, attr_version);
+       fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
 
        return inode;
 }
@@ -1210,6 +1232,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
                                fc->init_security = 1;
                        if (flags & FUSE_CREATE_SUPP_GROUP)
                                fc->create_supp_group = 1;
+                       if (flags & FUSE_DIRECT_IO_RELAX)
+                               fc->direct_io_relax = 1;
                } else {
                        ra_pages = fc->max_read / PAGE_SIZE;
                        fc->no_lock = 1;
@@ -1256,7 +1280,7 @@ void fuse_send_init(struct fuse_mount *fm)
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
                FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
-               FUSE_HAS_EXPIRE_ONLY;
+               FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX;
 #ifdef CONFIG_FUSE_DAX
        if (fm->fc->dax)
                flags |= FUSE_MAP_ALIGNMENT;
index dc60347..9e6d587 100644 (file)
@@ -223,8 +223,8 @@ retry:
                spin_unlock(&fi->lock);
 
                forget_all_cached_acls(inode);
-               fuse_change_attributes(inode, &o->attr,
-                                      entry_attr_timeout(o),
+               fuse_change_attributes(inode, &o->attr, NULL,
+                                      ATTR_TIMEOUT(o),
                                       attr_version);
                /*
                 * The other branch comes via fuse_iget()
@@ -232,7 +232,7 @@ retry:
                 */
        } else {
                inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
-                                 &o->attr, entry_attr_timeout(o),
+                                 &o->attr, ATTR_TIMEOUT(o),
                                  attr_version);
                if (!inode)
                        inode = ERR_PTR(-ENOMEM);
@@ -243,8 +243,16 @@ retry:
                        dput(dentry);
                        dentry = alias;
                }
-               if (IS_ERR(dentry))
+               if (IS_ERR(dentry)) {
+                       if (!IS_ERR(inode)) {
+                               struct fuse_inode *fi = get_fuse_inode(inode);
+
+                               spin_lock(&fi->lock);
+                               fi->nlookup--;
+                               spin_unlock(&fi->lock);
+                       }
                        return PTR_ERR(dentry);
+               }
        }
        if (fc->readdirplus_auto)
                set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
index 9c4b26a..c26d483 100644 (file)
@@ -183,13 +183,13 @@ static int gfs2_writepages(struct address_space *mapping,
        int ret;
 
        /*
-        * Even if we didn't write any pages here, we might still be holding
+        * Even if we didn't write enough pages here, we might still be holding
         * dirty pages in the ail. We forcibly flush the ail because we don't
         * want balance_dirty_pages() to loop indefinitely trying to write out
         * pages held in the ail that it can't find.
         */
        ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
-       if (ret == 0)
+       if (ret == 0 && wbc->nr_to_write > 0)
                set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
        return ret;
 }
@@ -272,8 +272,7 @@ continue_unlock:
                                 * not be suitable for data integrity
                                 * writeout).
                                 */
-                               *done_index = folio->index +
-                                       folio_nr_pages(folio);
+                               *done_index = folio_next_index(folio);
                                ret = 1;
                                break;
                        }
index f62366b..ef7017f 100644 (file)
@@ -161,7 +161,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip)
        int error;
 
        down_write(&ip->i_rw_mutex);
-       page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
+       page = grab_cache_page(inode->i_mapping, 0);
        error = -ENOMEM;
        if (!page)
                goto out;
index 1438e74..9cbf8d9 100644 (file)
@@ -176,7 +176,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
        wake_up_glock(gl);
        call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
        if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-               wake_up(&sdp->sd_glock_wait);
+               wake_up(&sdp->sd_kill_wait);
 }
 
 /**
@@ -468,10 +468,10 @@ done:
  * do_promote - promote as many requests as possible on the current queue
  * @gl: The glock
  * 
- * Returns: 1 if there is a blocked holder at the head of the list
+ * Returns true on success (i.e., progress was made or there are no waiters).
  */
 
-static int do_promote(struct gfs2_glock *gl)
+static bool do_promote(struct gfs2_glock *gl)
 {
        struct gfs2_holder *gh, *current_gh;
 
@@ -484,10 +484,10 @@ static int do_promote(struct gfs2_glock *gl)
                         * If we get here, it means we may not grant this
                         * holder for some reason. If this holder is at the
                         * head of the list, it means we have a blocked holder
-                        * at the head, so return 1.
+                        * at the head, so return false.
                         */
                        if (list_is_first(&gh->gh_list, &gl->gl_holders))
-                               return 1;
+                               return false;
                        do_error(gl, 0);
                        break;
                }
@@ -497,7 +497,7 @@ static int do_promote(struct gfs2_glock *gl)
                if (!current_gh)
                        current_gh = gh;
        }
-       return 0;
+       return true;
 }
 
 /**
@@ -591,10 +591,11 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
                if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
                        /* move to back of queue and try next entry */
                        if (ret & LM_OUT_CANCELED) {
-                               if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
-                                       list_move_tail(&gh->gh_list, &gl->gl_holders);
+                               list_move_tail(&gh->gh_list, &gl->gl_holders);
                                gh = find_first_waiter(gl);
                                gl->gl_target = gh->gh_state;
+                               if (do_promote(gl))
+                                       goto out;
                                goto retry;
                        }
                        /* Some error or failed "try lock" - report it */
@@ -679,8 +680,7 @@ __acquires(&gl->gl_lockref.lock)
            gh && !(gh->gh_flags & LM_FLAG_NOEXP))
                goto skip_inval;
 
-       lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
-                     LM_FLAG_PRIORITY);
+       lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP);
        GLOCK_BUG_ON(gl, gl->gl_state == target);
        GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
        if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
@@ -834,7 +834,7 @@ __acquires(&gl->gl_lockref.lock)
        } else {
                if (test_bit(GLF_DEMOTE, &gl->gl_flags))
                        gfs2_demote_wake(gl);
-               if (do_promote(gl) == 0)
+               if (do_promote(gl))
                        goto out_unlock;
                gh = find_first_waiter(gl);
                gl->gl_target = gh->gh_state;
@@ -1022,7 +1022,7 @@ static void delete_work_func(struct work_struct *work)
                 * step entirely.
                 */
                if (gfs2_try_evict(gl)) {
-                       if (test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+                       if (test_bit(SDF_KILL, &sdp->sd_flags))
                                goto out;
                        if (gfs2_queue_verify_evict(gl))
                                return;
@@ -1035,7 +1035,7 @@ static void delete_work_func(struct work_struct *work)
                                            GFS2_BLKST_UNLINKED);
                if (IS_ERR(inode)) {
                        if (PTR_ERR(inode) == -EAGAIN &&
-                           !test_bit(SDF_DEACTIVATING, &sdp->sd_flags) &&
+                           !test_bit(SDF_KILL, &sdp->sd_flags) &&
                            gfs2_queue_verify_evict(gl))
                                return;
                } else {
@@ -1231,7 +1231,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 out_free:
        gfs2_glock_dealloc(&gl->gl_rcu);
        if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-               wake_up(&sdp->sd_glock_wait);
+               wake_up(&sdp->sd_kill_wait);
 
 out:
        return ret;
@@ -1515,27 +1515,20 @@ fail:
                }
                if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
                        continue;
-               if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
-                       insert_pt = &gh2->gh_list;
        }
        trace_gfs2_glock_queue(gh, 1);
        gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
        gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
        if (likely(insert_pt == NULL)) {
                list_add_tail(&gh->gh_list, &gl->gl_holders);
-               if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
-                       goto do_cancel;
                return;
        }
        list_add_tail(&gh->gh_list, insert_pt);
-do_cancel:
        gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
-       if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
-               spin_unlock(&gl->gl_lockref.lock);
-               if (sdp->sd_lockstruct.ls_ops->lm_cancel)
-                       sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
-               spin_lock(&gl->gl_lockref.lock);
-       }
+       spin_unlock(&gl->gl_lockref.lock);
+       if (sdp->sd_lockstruct.ls_ops->lm_cancel)
+               sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
+       spin_lock(&gl->gl_lockref.lock);
        return;
 
 trap_recursive:
@@ -2195,7 +2188,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
        flush_workqueue(glock_workqueue);
        glock_hash_walk(clear_glock, sdp);
        flush_workqueue(glock_workqueue);
-       wait_event_timeout(sdp->sd_glock_wait,
+       wait_event_timeout(sdp->sd_kill_wait,
                           atomic_read(&sdp->sd_glock_disposal) == 0,
                           HZ * 600);
        glock_hash_walk(dump_glock_func, sdp);
@@ -2227,8 +2220,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
                *p++ = 'e';
        if (flags & LM_FLAG_ANY)
                *p++ = 'A';
-       if (flags & LM_FLAG_PRIORITY)
-               *p++ = 'p';
        if (flags & LM_FLAG_NODE_SCOPE)
                *p++ = 'n';
        if (flags & GL_ASYNC)
index 1f1ba92..c8685ca 100644 (file)
@@ -68,14 +68,6 @@ enum {
  * also be granted in SHARED.  The preferred state is whichever is compatible
  * with other granted locks, or the specified state if no other locks exist.
  *
- * LM_FLAG_PRIORITY
- * Override fairness considerations.  Suppose a lock is held in a shared state
- * and there is a pending request for the deferred state.  A shared lock
- * request with the priority flag would be allowed to bypass the deferred
- * request and directly join the other shared lock.  A shared lock request
- * without the priority flag might be forced to wait until the deferred
- * requested had acquired and released the lock.
- *
  * LM_FLAG_NODE_SCOPE
  * This holder agrees to share the lock within this node. In other words,
  * the glock is held in EX mode according to DLM, but local holders on the
@@ -86,7 +78,6 @@ enum {
 #define LM_FLAG_TRY_1CB                0x0002
 #define LM_FLAG_NOEXP          0x0004
 #define LM_FLAG_ANY            0x0008
-#define LM_FLAG_PRIORITY       0x0010
 #define LM_FLAG_NODE_SCOPE     0x0020
 #define GL_ASYNC               0x0040
 #define GL_EXACT               0x0080
index aecdac3..d26759a 100644 (file)
@@ -637,7 +637,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
        if (!remote || sb_rdonly(sdp->sd_vfs) ||
-           test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+           test_bit(SDF_KILL, &sdp->sd_flags))
                return;
 
        if (gl->gl_demote_state == LM_ST_UNLOCKED &&
index 04f2d78..a8c95c5 100644 (file)
@@ -452,7 +452,7 @@ struct gfs2_quota_data {
        s64 qd_change_sync;
 
        unsigned int qd_slot;
-       unsigned int qd_slot_count;
+       unsigned int qd_slot_ref;
 
        struct buffer_head *qd_bh;
        struct gfs2_quota_change *qd_bh_qc;
@@ -537,6 +537,7 @@ struct gfs2_statfs_change_host {
 #define GFS2_QUOTA_OFF         0
 #define GFS2_QUOTA_ACCOUNT     1
 #define GFS2_QUOTA_ON          2
+#define GFS2_QUOTA_QUIET       3 /* on but not complaining */
 
 #define GFS2_DATA_DEFAULT      GFS2_DATA_ORDERED
 #define GFS2_DATA_WRITEBACK    1
@@ -606,7 +607,7 @@ enum {
        SDF_REMOTE_WITHDRAW     = 13, /* Performing remote recovery */
        SDF_WITHDRAW_RECOVERY   = 14, /* Wait for journal recovery when we are
                                         withdrawing */
-       SDF_DEACTIVATING        = 15,
+       SDF_KILL                = 15,
        SDF_EVICTING            = 16,
        SDF_FROZEN              = 17,
 };
@@ -716,7 +717,7 @@ struct gfs2_sbd {
        struct gfs2_glock *sd_rename_gl;
        struct gfs2_glock *sd_freeze_gl;
        struct work_struct sd_freeze_work;
-       wait_queue_head_t sd_glock_wait;
+       wait_queue_head_t sd_kill_wait;
        wait_queue_head_t sd_async_glock_wait;
        atomic_t sd_glock_disposal;
        struct completion sd_locking_init;
index a21ac41..0eac045 100644 (file)
@@ -276,10 +276,16 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
         * gfs2_lookup_simple callers expect ENOENT
         * and do not check for NULL.
         */
-       if (inode == NULL)
-               return ERR_PTR(-ENOENT);
-       else
-               return inode;
+       if (IS_ERR_OR_NULL(inode))
+               return inode ? inode : ERR_PTR(-ENOENT);
+
+       /*
+        * Must not call back into the filesystem when allocating
+        * pages in the metadata inode's address space.
+        */
+       mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+       return inode;
 }
 
 
index 5491129..59ab18c 100644 (file)
@@ -222,11 +222,6 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
                lkf |= DLM_LKF_NOQUEUEBAST;
        }
 
-       if (gfs_flags & LM_FLAG_PRIORITY) {
-               lkf |= DLM_LKF_NOORDER;
-               lkf |= DLM_LKF_HEADQUE;
-       }
-
        if (gfs_flags & LM_FLAG_ANY) {
                if (req == DLM_LOCK_PR)
                        lkf |= DLM_LKF_ALTCW;
index aa56879..e5271ae 100644 (file)
@@ -1227,6 +1227,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        gfs2_log_unlock(sdp);
 }
 
+static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
+{
+       return atomic_read(&sdp->sd_log_pinned) +
+              atomic_read(&sdp->sd_log_blks_needed) >=
+              atomic_read(&sdp->sd_log_thresh1);
+}
+
+static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+{
+       return sdp->sd_jdesc->jd_blocks -
+              atomic_read(&sdp->sd_log_blks_free) +
+              atomic_read(&sdp->sd_log_blks_needed) >=
+              atomic_read(&sdp->sd_log_thresh2);
+}
+
 /**
  * gfs2_log_commit - Commit a transaction to the log
  * @sdp: the filesystem
@@ -1246,9 +1261,7 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
        log_refund(sdp, tr);
 
-       if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
-           ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
-           atomic_read(&sdp->sd_log_thresh2)))
+       if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp))
                wake_up(&sdp->sd_logd_waitq);
 }
 
@@ -1271,24 +1284,6 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
 }
 
-static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
-{
-       return (atomic_read(&sdp->sd_log_pinned) +
-               atomic_read(&sdp->sd_log_blks_needed) >=
-               atomic_read(&sdp->sd_log_thresh1));
-}
-
-static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
-{
-       unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
-
-       if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
-               return 1;
-
-       return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
-               atomic_read(&sdp->sd_log_thresh2);
-}
-
 /**
  * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
  * @data: Pointer to GFS2 superblock
@@ -1301,14 +1296,11 @@ int gfs2_logd(void *data)
 {
        struct gfs2_sbd *sdp = data;
        unsigned long t = 1;
-       DEFINE_WAIT(wait);
 
        while (!kthread_should_stop()) {
+               if (gfs2_withdrawn(sdp))
+                       break;
 
-               if (gfs2_withdrawn(sdp)) {
-                       msleep_interruptible(HZ);
-                       continue;
-               }
                /* Check for errors writing to the journal */
                if (sdp->sd_log_error) {
                        gfs2_lm(sdp,
@@ -1317,7 +1309,7 @@ int gfs2_logd(void *data)
                                "prevent further damage.\n",
                                sdp->sd_fsname, sdp->sd_log_error);
                        gfs2_withdraw(sdp);
-                       continue;
+                       break;
                }
 
                if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
@@ -1326,7 +1318,9 @@ int gfs2_logd(void *data)
                                                  GFS2_LFC_LOGD_JFLUSH_REQD);
                }
 
-               if (gfs2_ail_flush_reqd(sdp)) {
+               if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+                   gfs2_ail_flush_reqd(sdp)) {
+                       clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
                        gfs2_ail1_start(sdp);
                        gfs2_ail1_wait(sdp);
                        gfs2_ail1_empty(sdp, 0);
@@ -1338,17 +1332,14 @@ int gfs2_logd(void *data)
 
                try_to_freeze();
 
-               do {
-                       prepare_to_wait(&sdp->sd_logd_waitq, &wait,
-                                       TASK_INTERRUPTIBLE);
-                       if (!gfs2_ail_flush_reqd(sdp) &&
-                           !gfs2_jrnl_flush_reqd(sdp) &&
-                           !kthread_should_stop())
-                               t = schedule_timeout(t);
-               } while(t && !gfs2_ail_flush_reqd(sdp) &&
-                       !gfs2_jrnl_flush_reqd(sdp) &&
-                       !kthread_should_stop());
-               finish_wait(&sdp->sd_logd_waitq, &wait);
+               t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
+                               test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+                               gfs2_ail_flush_reqd(sdp) ||
+                               gfs2_jrnl_flush_reqd(sdp) ||
+                               sdp->sd_log_error ||
+                               gfs2_withdrawn(sdp) ||
+                               kthread_should_stop(),
+                               t);
        }
 
        return 0;
index 251322b..483f698 100644 (file)
@@ -456,7 +456,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
  * Find the folio with 'index' in the journal's mapping. Search the folio for
  * the journal head if requested (cleanup == false). Release refs on the
  * folio so the page cache can reclaim it. We grabbed a
- * reference on this folio twice, first when we did a find_or_create_page()
+ * reference on this folio twice, first when we did a grab_cache_page()
  * to obtain the folio to add it to the bio and second when we do a
  * filemap_get_folio() here to get the folio to wait on while I/O on it is being
  * completed.
@@ -481,7 +481,7 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
        if (!*done)
                *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
 
-       /* filemap_get_folio() and the earlier find_or_create_page() */
+       /* filemap_get_folio() and the earlier grab_cache_page() */
        folio_put_refs(folio, 2);
 }
 
@@ -535,8 +535,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
 
                for (; block < je->lblock + je->blocks; block++, dblock++) {
                        if (!page) {
-                               page = find_or_create_page(mapping,
-                                               block >> shift, GFP_NOFS);
+                               page = grab_cache_page(mapping, block >> shift);
                                if (!page) {
                                        ret = -ENOMEM;
                                        done = true;
index afcb328..66eb98b 100644 (file)
@@ -152,9 +152,9 @@ static int __init init_gfs2_fs(void)
                goto fail_shrinker;
 
        error = -ENOMEM;
-       gfs_recovery_wq = alloc_workqueue("gfs_recovery",
+       gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
                                          WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
-       if (!gfs_recovery_wq)
+       if (!gfs2_recovery_wq)
                goto fail_wq1;
 
        gfs2_control_wq = alloc_workqueue("gfs2_control",
@@ -162,7 +162,7 @@ static int __init init_gfs2_fs(void)
        if (!gfs2_control_wq)
                goto fail_wq2;
 
-       gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+       gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0);
 
        if (!gfs2_freeze_wq)
                goto fail_wq3;
@@ -194,7 +194,7 @@ fail_mempool:
 fail_wq3:
        destroy_workqueue(gfs2_control_wq);
 fail_wq2:
-       destroy_workqueue(gfs_recovery_wq);
+       destroy_workqueue(gfs2_recovery_wq);
 fail_wq1:
        unregister_shrinker(&gfs2_qd_shrinker);
 fail_shrinker:
@@ -234,7 +234,7 @@ static void __exit exit_gfs2_fs(void)
        gfs2_unregister_debugfs();
        unregister_filesystem(&gfs2_fs_type);
        unregister_filesystem(&gfs2meta_fs_type);
-       destroy_workqueue(gfs_recovery_wq);
+       destroy_workqueue(gfs2_recovery_wq);
        destroy_workqueue(gfs2_control_wq);
        destroy_workqueue(gfs2_freeze_wq);
        list_lru_destroy(&gfs2_qd_lru);
index 8a27957..33ca047 100644 (file)
@@ -87,7 +87,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
        gfs2_tune_init(&sdp->sd_tune);
 
-       init_waitqueue_head(&sdp->sd_glock_wait);
+       init_waitqueue_head(&sdp->sd_kill_wait);
        init_waitqueue_head(&sdp->sd_async_glock_wait);
        atomic_set(&sdp->sd_glock_disposal, 0);
        init_completion(&sdp->sd_locking_init);
@@ -1103,29 +1103,49 @@ static int init_threads(struct gfs2_sbd *sdp)
        struct task_struct *p;
        int error = 0;
 
-       p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+       p = kthread_create(gfs2_logd, sdp, "gfs2_logd/%s", sdp->sd_fsname);
        if (IS_ERR(p)) {
                error = PTR_ERR(p);
-               fs_err(sdp, "can't start logd thread: %d\n", error);
+               fs_err(sdp, "can't create logd thread: %d\n", error);
                return error;
        }
+       get_task_struct(p);
        sdp->sd_logd_process = p;
 
-       p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+       p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad/%s", sdp->sd_fsname);
        if (IS_ERR(p)) {
                error = PTR_ERR(p);
-               fs_err(sdp, "can't start quotad thread: %d\n", error);
+               fs_err(sdp, "can't create quotad thread: %d\n", error);
                goto fail;
        }
+       get_task_struct(p);
        sdp->sd_quotad_process = p;
+
+       wake_up_process(sdp->sd_logd_process);
+       wake_up_process(sdp->sd_quotad_process);
        return 0;
 
 fail:
        kthread_stop(sdp->sd_logd_process);
+       put_task_struct(sdp->sd_logd_process);
        sdp->sd_logd_process = NULL;
        return error;
 }
 
+void gfs2_destroy_threads(struct gfs2_sbd *sdp)
+{
+       if (sdp->sd_logd_process) {
+               kthread_stop(sdp->sd_logd_process);
+               put_task_struct(sdp->sd_logd_process);
+               sdp->sd_logd_process = NULL;
+       }
+       if (sdp->sd_quotad_process) {
+               kthread_stop(sdp->sd_quotad_process);
+               put_task_struct(sdp->sd_quotad_process);
+               sdp->sd_quotad_process = NULL;
+       }
+}
+
 /**
  * gfs2_fill_super - Read in superblock
  * @sb: The VFS superblock
@@ -1276,12 +1296,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 
        if (error) {
                gfs2_freeze_unlock(&sdp->sd_freeze_gh);
-               if (sdp->sd_quotad_process)
-                       kthread_stop(sdp->sd_quotad_process);
-               sdp->sd_quotad_process = NULL;
-               if (sdp->sd_logd_process)
-                       kthread_stop(sdp->sd_logd_process);
-               sdp->sd_logd_process = NULL;
+               gfs2_destroy_threads(sdp);
                fs_err(sdp, "can't make FS RW: %d\n", error);
                goto fail_per_node;
        }
@@ -1381,6 +1396,7 @@ static const struct constant_table gfs2_param_quota[] = {
        {"off",        GFS2_QUOTA_OFF},
        {"account",    GFS2_QUOTA_ACCOUNT},
        {"on",         GFS2_QUOTA_ON},
+       {"quiet",      GFS2_QUOTA_QUIET},
        {}
 };
 
@@ -1786,9 +1802,9 @@ static void gfs2_kill_sb(struct super_block *sb)
        /*
         * Flush and then drain the delete workqueue here (via
         * destroy_workqueue()) to ensure that any delete work that
-        * may be running will also see the SDF_DEACTIVATING flag.
+        * may be running will also see the SDF_KILL flag.
         */
-       set_bit(SDF_DEACTIVATING, &sdp->sd_flags);
+       set_bit(SDF_KILL, &sdp->sd_flags);
        gfs2_flush_delete_work(sdp);
        destroy_workqueue(sdp->sd_delete_wq);
 
index aa5fd06..171b271 100644 (file)
@@ -109,38 +109,44 @@ static inline void spin_unlock_bucket(unsigned int hash)
 static void gfs2_qd_dealloc(struct rcu_head *rcu)
 {
        struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
+       struct gfs2_sbd *sdp = qd->qd_sbd;
+
        kmem_cache_free(gfs2_quotad_cachep, qd);
+       if (atomic_dec_and_test(&sdp->sd_quota_count))
+               wake_up(&sdp->sd_kill_wait);
 }
 
-static void gfs2_qd_dispose(struct list_head *list)
+static void gfs2_qd_dispose(struct gfs2_quota_data *qd)
 {
-       struct gfs2_quota_data *qd;
-       struct gfs2_sbd *sdp;
-
-       while (!list_empty(list)) {
-               qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
-               sdp = qd->qd_gl->gl_name.ln_sbd;
-
-               list_del(&qd->qd_lru);
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
-               /* Free from the filesystem-specific list */
-               spin_lock(&qd_lock);
-               list_del(&qd->qd_list);
-               spin_unlock(&qd_lock);
+       spin_lock(&qd_lock);
+       list_del(&qd->qd_list);
+       spin_unlock(&qd_lock);
 
-               spin_lock_bucket(qd->qd_hash);
-               hlist_bl_del_rcu(&qd->qd_hlist);
-               spin_unlock_bucket(qd->qd_hash);
+       spin_lock_bucket(qd->qd_hash);
+       hlist_bl_del_rcu(&qd->qd_hlist);
+       spin_unlock_bucket(qd->qd_hash);
 
+       if (!gfs2_withdrawn(sdp)) {
                gfs2_assert_warn(sdp, !qd->qd_change);
-               gfs2_assert_warn(sdp, !qd->qd_slot_count);
+               gfs2_assert_warn(sdp, !qd->qd_slot_ref);
                gfs2_assert_warn(sdp, !qd->qd_bh_count);
+       }
 
-               gfs2_glock_put(qd->qd_gl);
-               atomic_dec(&sdp->sd_quota_count);
+       gfs2_glock_put(qd->qd_gl);
+       call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+}
 
-               /* Delete it from the common reclaim list */
-               call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+static void gfs2_qd_list_dispose(struct list_head *list)
+{
+       struct gfs2_quota_data *qd;
+
+       while (!list_empty(list)) {
+               qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
+               list_del(&qd->qd_lru);
+
+               gfs2_qd_dispose(qd);
        }
 }
 
@@ -149,18 +155,22 @@ static enum lru_status gfs2_qd_isolate(struct list_head *item,
                struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
 {
        struct list_head *dispose = arg;
-       struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru);
+       struct gfs2_quota_data *qd =
+               list_entry(item, struct gfs2_quota_data, qd_lru);
+       enum lru_status status;
 
        if (!spin_trylock(&qd->qd_lockref.lock))
                return LRU_SKIP;
 
+       status = LRU_SKIP;
        if (qd->qd_lockref.count == 0) {
                lockref_mark_dead(&qd->qd_lockref);
                list_lru_isolate_move(lru, &qd->qd_lru, dispose);
+               status = LRU_REMOVED;
        }
 
        spin_unlock(&qd->qd_lockref.lock);
-       return LRU_REMOVED;
+       return status;
 }
 
 static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
@@ -175,7 +185,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
        freed = list_lru_shrink_walk(&gfs2_qd_lru, sc,
                                     gfs2_qd_isolate, &dispose);
 
-       gfs2_qd_dispose(&dispose);
+       gfs2_qd_list_dispose(&dispose);
 
        return freed;
 }
@@ -203,12 +213,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
 
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
-       u64 offset;
-
-       offset = qd2index(qd);
-       offset *= sizeof(struct gfs2_quota);
-
-       return offset;
+       return qd2index(qd) * sizeof(struct gfs2_quota);
 }
 
 static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
@@ -221,7 +226,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
                return NULL;
 
        qd->qd_sbd = sdp;
-       qd->qd_lockref.count = 1;
+       qd->qd_lockref.count = 0;
        spin_lock_init(&qd->qd_lockref.lock);
        qd->qd_id = qid;
        qd->qd_slot = -1;
@@ -283,6 +288,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
        spin_lock_bucket(hash);
        *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
        if (qd == NULL) {
+               new_qd->qd_lockref.count++;
                *qdp = new_qd;
                list_add(&new_qd->qd_list, &sdp->sd_quota_list);
                hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
@@ -302,20 +308,31 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
 
 static void qd_hold(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
        lockref_get(&qd->qd_lockref);
 }
 
 static void qd_put(struct gfs2_quota_data *qd)
 {
+       struct gfs2_sbd *sdp;
+
        if (lockref_put_or_lock(&qd->qd_lockref))
                return;
 
+       BUG_ON(__lockref_is_dead(&qd->qd_lockref));
+       sdp = qd->qd_sbd;
+       if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+               lockref_mark_dead(&qd->qd_lockref);
+               spin_unlock(&qd->qd_lockref.lock);
+
+               gfs2_qd_dispose(qd);
+               return;
+       }
+
        qd->qd_lockref.count = 0;
        list_lru_add(&gfs2_qd_lru, &qd->qd_lru);
        spin_unlock(&qd->qd_lockref.lock);
-
 }
 
 static int slot_get(struct gfs2_quota_data *qd)
@@ -325,20 +342,19 @@ static int slot_get(struct gfs2_quota_data *qd)
        int error = 0;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       if (qd->qd_slot_count != 0)
-               goto out;
-
-       error = -ENOSPC;
-       bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots);
-       if (bit < sdp->sd_quota_slots) {
+       if (qd->qd_slot_ref == 0) {
+               bit = find_first_zero_bit(sdp->sd_quota_bitmap,
+                                         sdp->sd_quota_slots);
+               if (bit >= sdp->sd_quota_slots) {
+                       error = -ENOSPC;
+                       goto out;
+               }
                set_bit(bit, sdp->sd_quota_bitmap);
                qd->qd_slot = bit;
-               error = 0;
-out:
-               qd->qd_slot_count++;
        }
+       qd->qd_slot_ref++;
+out:
        spin_unlock(&sdp->sd_bitmap_lock);
-
        return error;
 }
 
@@ -347,8 +363,8 @@ static void slot_hold(struct gfs2_quota_data *qd)
        struct gfs2_sbd *sdp = qd->qd_sbd;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       gfs2_assert(sdp, qd->qd_slot_count);
-       qd->qd_slot_count++;
+       gfs2_assert(sdp, qd->qd_slot_ref);
+       qd->qd_slot_ref++;
        spin_unlock(&sdp->sd_bitmap_lock);
 }
 
@@ -357,8 +373,8 @@ static void slot_put(struct gfs2_quota_data *qd)
        struct gfs2_sbd *sdp = qd->qd_sbd;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       gfs2_assert(sdp, qd->qd_slot_count);
-       if (!--qd->qd_slot_count) {
+       gfs2_assert(sdp, qd->qd_slot_ref);
+       if (!--qd->qd_slot_ref) {
                BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap));
                qd->qd_slot = -1;
        }
@@ -367,7 +383,7 @@ static void slot_put(struct gfs2_quota_data *qd)
 
 static int bh_get(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct inode *inode = sdp->sd_qc_inode;
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int block, offset;
@@ -421,7 +437,7 @@ fail:
 
 static void bh_put(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
        mutex_lock(&sdp->sd_quota_mutex);
        gfs2_assert(sdp, qd->qd_bh_count);
@@ -451,6 +467,20 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
        return 1;
 }
 
+static int qd_bh_get_or_undo(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
+{
+       int error;
+
+       error = bh_get(qd);
+       if (!error)
+               return 0;
+
+       clear_bit(QDF_LOCKED, &qd->qd_flags);
+       slot_put(qd);
+       qd_put(qd);
+       return error;
+}
+
 static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
 {
        struct gfs2_quota_data *qd = NULL, *iter;
@@ -473,30 +503,29 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
        spin_unlock(&qd_lock);
 
        if (qd) {
-               error = bh_get(qd);
-               if (error) {
-                       clear_bit(QDF_LOCKED, &qd->qd_flags);
-                       slot_put(qd);
-                       qd_put(qd);
+               error = qd_bh_get_or_undo(sdp, qd);
+               if (error)
                        return error;
-               }
+               *qdp = qd;
        }
 
-       *qdp = qd;
-
        return 0;
 }
 
-static void qd_unlock(struct gfs2_quota_data *qd)
+static void qdsb_put(struct gfs2_quota_data *qd)
 {
-       gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd,
-                        test_bit(QDF_LOCKED, &qd->qd_flags));
-       clear_bit(QDF_LOCKED, &qd->qd_flags);
        bh_put(qd);
        slot_put(qd);
        qd_put(qd);
 }
 
+static void qd_unlock(struct gfs2_quota_data *qd)
+{
+       gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
+       clear_bit(QDF_LOCKED, &qd->qd_flags);
+       qdsb_put(qd);
+}
+
 static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
                    struct gfs2_quota_data **qdp)
 {
@@ -523,13 +552,6 @@ fail:
        return error;
 }
 
-static void qdsb_put(struct gfs2_quota_data *qd)
-{
-       bh_put(qd);
-       slot_put(qd);
-       qd_put(qd);
-}
-
 /**
  * gfs2_qa_get - make sure we have a quota allocations data structure,
  *               if necessary
@@ -666,7 +688,7 @@ static int sort_qd(const void *a, const void *b)
 
 static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
        struct gfs2_quota_change *qc = qd->qd_bh_qc;
        s64 x;
@@ -708,30 +730,29 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
        mutex_unlock(&sdp->sd_quota_mutex);
 }
 
-static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
+static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
                                  unsigned off, void *buf, unsigned bytes)
 {
+       struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct inode *inode = &ip->i_inode;
-       struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
        struct buffer_head *bh;
        u64 blk;
        unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
        unsigned to_write = bytes, pg_off = off;
-       int done = 0;
 
        blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
        boff = off % bsize;
 
-       page = find_or_create_page(mapping, index, GFP_NOFS);
+       page = grab_cache_page(mapping, index);
        if (!page)
                return -ENOMEM;
        if (!page_has_buffers(page))
                create_empty_buffers(page, bsize, 0);
 
        bh = page_buffers(page);
-       while (!done) {
+       for(;;) {
                /* Find the beginning block within the page */
                if (pg_off >= ((bnum * bsize) + bsize)) {
                        bh = bh->b_this_page;
@@ -751,10 +772,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
                        set_buffer_uptodate(bh);
                if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
                        goto unlock_out;
-               if (gfs2_is_jdata(ip))
-                       gfs2_trans_add_data(ip->i_gl, bh);
-               else
-                       gfs2_ordered_add_inode(ip);
+               gfs2_trans_add_data(ip->i_gl, bh);
 
                /* If we need to write to the next block as well */
                if (to_write > (bsize - boff)) {
@@ -763,7 +781,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
                        boff = pg_off % bsize;
                        continue;
                }
-               done = 1;
+               break;
        }
 
        /* Write to the page, now that we have setup the buffer(s) */
@@ -780,12 +798,12 @@ unlock_out:
        return -EIO;
 }
 
-static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
+static int gfs2_write_disk_quota(struct gfs2_sbd *sdp, struct gfs2_quota *qp,
                                 loff_t loc)
 {
        unsigned long pg_beg;
        unsigned pg_off, nbytes, overflow = 0;
-       int pg_oflow = 0, error;
+       int error;
        void *ptr;
 
        nbytes = sizeof(struct gfs2_quota);
@@ -794,17 +812,15 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
        pg_off = offset_in_page(loc);
 
        /* If the quota straddles a page boundary, split the write in two */
-       if ((pg_off + nbytes) > PAGE_SIZE) {
-               pg_oflow = 1;
+       if ((pg_off + nbytes) > PAGE_SIZE)
                overflow = (pg_off + nbytes) - PAGE_SIZE;
-       }
 
        ptr = qp;
-       error = gfs2_write_buf_to_page(ip, pg_beg, pg_off, ptr,
+       error = gfs2_write_buf_to_page(sdp, pg_beg, pg_off, ptr,
                                       nbytes - overflow);
        /* If there's an overflow, write the remaining bytes to the next page */
-       if (!error && pg_oflow)
-               error = gfs2_write_buf_to_page(ip, pg_beg + 1, 0,
+       if (!error && overflow)
+               error = gfs2_write_buf_to_page(sdp, pg_beg + 1, 0,
                                               ptr + nbytes - overflow,
                                               overflow);
        return error;
@@ -812,7 +828,7 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
 
 /**
  * gfs2_adjust_quota - adjust record of current block usage
- * @ip: The quota inode
+ * @sdp: The superblock
  * @loc: Offset of the entry in the quota file
  * @change: The amount of usage change to record
  * @qd: The quota data
@@ -824,12 +840,12 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
  * Returns: 0 or -ve on error
  */
 
-static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc,
                             s64 change, struct gfs2_quota_data *qd,
                             struct qc_dqblk *fdq)
 {
+       struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct inode *inode = &ip->i_inode;
-       struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_quota q;
        int err;
        u64 size;
@@ -846,7 +862,6 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                return err;
 
        loc -= sizeof(q); /* gfs2_internal_read would've advanced the loc ptr */
-       err = -EIO;
        be64_add_cpu(&q.qu_value, change);
        if (((s64)be64_to_cpu(q.qu_value)) < 0)
                q.qu_value = 0; /* Never go negative on quota usage */
@@ -866,7 +881,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                }
        }
 
-       err = gfs2_write_disk_quota(ip, &q, loc);
+       err = gfs2_write_disk_quota(sdp, &q, loc);
        if (!err) {
                size = loc + sizeof(struct gfs2_quota);
                if (size > inode->i_size)
@@ -881,7 +896,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 
 static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 {
-       struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = (*qda)->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct gfs2_alloc_parms ap = { .aflags = 0, };
        unsigned int data_blocks, ind_blocks;
@@ -893,18 +908,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        unsigned int nalloc = 0, blocks;
        int error;
 
-       error = gfs2_qa_get(ip);
-       if (error)
-               return error;
-
        gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
                              &data_blocks, &ind_blocks);
 
        ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
-       if (!ghs) {
-               error = -ENOMEM;
-               goto out;
-       }
+       if (!ghs)
+               return -ENOMEM;
 
        sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
        inode_lock(&ip->i_inode);
@@ -953,7 +962,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        for (x = 0; x < num_qd; x++) {
                qd = qda[x];
                offset = qd2offset(qd);
-               error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
+               error = gfs2_adjust_quota(sdp, offset, qd->qd_change_sync, qd,
+                                                       NULL);
                if (error)
                        goto out_end_trans;
 
@@ -961,8 +971,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                set_bit(QDF_REFRESH, &qd->qd_flags);
        }
 
-       error = 0;
-
 out_end_trans:
        gfs2_trans_end(sdp);
 out_ipres:
@@ -976,8 +984,10 @@ out_dq:
        kfree(ghs);
        gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
                       GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
-out:
-       gfs2_qa_put(ip);
+       if (!error) {
+               for (x = 0; x < num_qd; x++)
+                       qda[x]->qd_sync_gen = sdp->sd_quota_sync_gen;
+       }
        return error;
 }
 
@@ -1009,11 +1019,12 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
 static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
                    struct gfs2_holder *q_gh)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct gfs2_holder i_gh;
        int error;
 
+       gfs2_assert_warn(sdp, sdp == qd->qd_gl->gl_name.ln_sbd);
 restart:
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
        if (error)
@@ -1059,9 +1070,10 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_quota_data *qd;
        u32 x;
-       int error = 0;
+       int error;
 
-       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+           sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
                return 0;
 
        error = gfs2_quota_hold(ip, uid, gid);
@@ -1089,16 +1101,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        return error;
 }
 
-static int need_sync(struct gfs2_quota_data *qd)
+static bool need_sync(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_tune *gt = &sdp->sd_tune;
        s64 value;
        unsigned int num, den;
-       int do_sync = 1;
 
        if (!qd->qd_qb.qb_limit)
-               return 0;
+               return false;
 
        spin_lock(&qd_lock);
        value = qd->qd_change;
@@ -1109,26 +1120,26 @@ static int need_sync(struct gfs2_quota_data *qd)
        den = gt->gt_quota_scale_den;
        spin_unlock(&gt->gt_spin);
 
-       if (value < 0)
-               do_sync = 0;
+       if (value <= 0)
+               return false;
        else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
                 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
-               do_sync = 0;
+               return false;
        else {
                value *= gfs2_jindex_size(sdp) * num;
                value = div_s64(value, den);
                value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
                if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
-                       do_sync = 0;
+                       return false;
        }
 
-       return do_sync;
+       return true;
 }
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_quota_data *qda[4];
+       struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS];
        unsigned int count = 0;
        u32 x;
        int found;
@@ -1138,7 +1149,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
-               int sync;
+               bool sync;
 
                qd = ip->i_qadata->qa_qd[x];
                sync = need_sync(qd);
@@ -1154,15 +1165,8 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
                if (!found)
                        continue;
 
-               gfs2_assert_warn(sdp, qd->qd_change_sync);
-               if (bh_get(qd)) {
-                       clear_bit(QDF_LOCKED, &qd->qd_flags);
-                       slot_put(qd);
-                       qd_put(qd);
-                       continue;
-               }
-
-               qda[count++] = qd;
+               if (!qd_bh_get_or_undo(sdp, qd))
+                       qda[count++] = qd;
        }
 
        if (count) {
@@ -1178,12 +1182,13 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 
 static int print_message(struct gfs2_quota_data *qd, char *type)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
-       fs_info(sdp, "quota %s for %s %u\n",
-               type,
-               (qd->qd_id.type == USRQUOTA) ? "user" : "group",
-               from_kqid(&init_user_ns, qd->qd_id));
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+               fs_info(sdp, "quota %s for %s %u\n",
+                       type,
+                       (qd->qd_id.type == USRQUOTA) ? "user" : "group",
+                       from_kqid(&init_user_ns, qd->qd_id));
 
        return 0;
 }
@@ -1269,7 +1274,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        u32 x;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
-       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON ||
+       if ((sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+           sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) ||
            gfs2_assert_warn(sdp, change))
                return;
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
@@ -1288,6 +1294,24 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        }
 }
 
+static bool qd_changed(struct gfs2_sbd *sdp)
+{
+       struct gfs2_quota_data *qd;
+       bool changed = false;
+
+       spin_lock(&qd_lock);
+       list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+               if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+                   !test_bit(QDF_CHANGE, &qd->qd_flags))
+                       continue;
+
+               changed = true;
+               break;
+       }
+       spin_unlock(&qd_lock);
+       return changed;
+}
+
 int gfs2_quota_sync(struct super_block *sb, int type)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1297,6 +1321,9 @@ int gfs2_quota_sync(struct super_block *sb, int type)
        unsigned int x;
        int error = 0;
 
+       if (!qd_changed(sdp))
+               return 0;
+
        qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
        if (!qda)
                return -ENOMEM;
@@ -1318,10 +1345,6 @@ int gfs2_quota_sync(struct super_block *sb, int type)
                if (num_qd) {
                        if (!error)
                                error = do_sync(num_qd, qda);
-                       if (!error)
-                               for (x = 0; x < num_qd; x++)
-                                       qda[x]->qd_sync_gen =
-                                               sdp->sd_quota_sync_gen;
 
                        for (x = 0; x < num_qd; x++)
                                qd_unlock(qda[x]);
@@ -1423,7 +1446,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
                        set_bit(QDF_CHANGE, &qd->qd_flags);
                        qd->qd_change = qc_change;
                        qd->qd_slot = slot;
-                       qd->qd_slot_count = 1;
+                       qd->qd_slot_ref = 1;
 
                        spin_lock(&qd_lock);
                        BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap));
@@ -1455,36 +1478,35 @@ fail:
 
 void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 {
-       struct list_head *head = &sdp->sd_quota_list;
        struct gfs2_quota_data *qd;
+       LIST_HEAD(dispose);
+       int count;
 
-       spin_lock(&qd_lock);
-       while (!list_empty(head)) {
-               qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
+       BUG_ON(test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
 
-               list_del(&qd->qd_list);
+       spin_lock(&qd_lock);
+       list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+               spin_lock(&qd->qd_lockref.lock);
+               if (qd->qd_lockref.count != 0) {
+                       spin_unlock(&qd->qd_lockref.lock);
+                       continue;
+               }
+               lockref_mark_dead(&qd->qd_lockref);
+               spin_unlock(&qd->qd_lockref.lock);
 
-               /* Also remove if this qd exists in the reclaim list */
                list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
-               atomic_dec(&sdp->sd_quota_count);
-               spin_unlock(&qd_lock);
-
-               spin_lock_bucket(qd->qd_hash);
-               hlist_bl_del_rcu(&qd->qd_hlist);
-               spin_unlock_bucket(qd->qd_hash);
-
-               gfs2_assert_warn(sdp, !qd->qd_change);
-               gfs2_assert_warn(sdp, !qd->qd_slot_count);
-               gfs2_assert_warn(sdp, !qd->qd_bh_count);
-
-               gfs2_glock_put(qd->qd_gl);
-               call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
-
-               spin_lock(&qd_lock);
+               list_add(&qd->qd_lru, &dispose);
        }
        spin_unlock(&qd_lock);
 
-       gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
+       gfs2_qd_list_dispose(&dispose);
+
+       wait_event_timeout(sdp->sd_kill_wait,
+               (count = atomic_read(&sdp->sd_quota_count)) == 0,
+               HZ * 60);
+
+       if (count != 0)
+               fs_err(sdp, "%d left-over quota data objects\n", count);
 
        kvfree(sdp->sd_quota_bitmap);
        sdp->sd_quota_bitmap = NULL;
@@ -1536,12 +1558,11 @@ int gfs2_quotad(void *data)
        unsigned long statfs_timeo = 0;
        unsigned long quotad_timeo = 0;
        unsigned long t = 0;
-       DEFINE_WAIT(wait);
 
        while (!kthread_should_stop()) {
-
                if (gfs2_withdrawn(sdp))
-                       goto bypass;
+                       break;
+
                /* Update the master statfs file */
                if (sdp->sd_statfs_force_sync) {
                        int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -1559,15 +1580,16 @@ int gfs2_quotad(void *data)
 
                try_to_freeze();
 
-bypass:
                t = min(quotad_timeo, statfs_timeo);
 
-               prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
-               if (!sdp->sd_statfs_force_sync)
-                       t -= schedule_timeout(t);
-               else
+               t = wait_event_interruptible_timeout(sdp->sd_quota_wait,
+                               sdp->sd_statfs_force_sync ||
+                               gfs2_withdrawn(sdp) ||
+                               kthread_should_stop(),
+                               t);
+
+               if (sdp->sd_statfs_force_sync)
                        t = 0;
-               finish_wait(&sdp->sd_quota_wait, &wait);
        }
 
        return 0;
@@ -1580,6 +1602,8 @@ static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
        memset(state, 0, sizeof(*state));
 
        switch (sdp->sd_args.ar_quota) {
+       case GFS2_QUOTA_QUIET:
+               fallthrough;
        case GFS2_QUOTA_ON:
                state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
                state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
@@ -1726,7 +1750,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
                goto out_release;
 
        /* Apply changes */
-       error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
+       error = gfs2_adjust_quota(sdp, offset, 0, qd, fdq);
        if (!error)
                clear_bit(QDF_QMSG_QUIET, &qd->qd_flags);
 
index 9c7a9f6..5aae026 100644 (file)
@@ -27,7 +27,7 @@
 #include "util.h"
 #include "dir.h"
 
-struct workqueue_struct *gfs_recovery_wq;
+struct workqueue_struct *gfs2_recovery_wq;
 
 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
                           struct buffer_head **bh)
@@ -570,7 +570,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
                return -EBUSY;
 
        /* we have JDF_RECOVERY, queue should always succeed */
-       rv = queue_work(gfs_recovery_wq, &jd->jd_work);
+       rv = queue_work(gfs2_recovery_wq, &jd->jd_work);
        BUG_ON(!rv);
 
        if (wait)
index 0d30f8e..7a0c9d0 100644 (file)
@@ -9,7 +9,7 @@
 
 #include "incore.h"
 
-extern struct workqueue_struct *gfs_recovery_wq;
+extern struct workqueue_struct *gfs2_recovery_wq;
 
 static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
 {
index 2f70133..02d93da 100644 (file)
@@ -546,20 +546,10 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
        int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-       if (!test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+       if (!test_bit(SDF_KILL, &sdp->sd_flags))
                gfs2_flush_delete_work(sdp);
 
-       if (!log_write_allowed && current == sdp->sd_quotad_process)
-               fs_warn(sdp, "The quotad daemon is withdrawing.\n");
-       else if (sdp->sd_quotad_process)
-               kthread_stop(sdp->sd_quotad_process);
-       sdp->sd_quotad_process = NULL;
-
-       if (!log_write_allowed && current == sdp->sd_logd_process)
-               fs_warn(sdp, "The logd daemon is withdrawing.\n");
-       else if (sdp->sd_logd_process)
-               kthread_stop(sdp->sd_logd_process);
-       sdp->sd_logd_process = NULL;
+       gfs2_destroy_threads(sdp);
 
        if (log_write_allowed) {
                gfs2_quota_sync(sdp->sd_vfs, 0);
@@ -580,15 +570,8 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
                                   gfs2_log_is_empty(sdp),
                                   HZ * 5);
                gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
-       } else {
-               wait_event_timeout(sdp->sd_log_waitq,
-                                  gfs2_log_is_empty(sdp),
-                                  HZ * 5);
        }
        gfs2_quota_cleanup(sdp);
-
-       if (!log_write_allowed)
-               sdp->sd_vfs->s_flags |= SB_RDONLY;
 }
 
 /**
@@ -622,6 +605,10 @@ restart:
        if (!sb_rdonly(sb)) {
                gfs2_make_fs_ro(sdp);
        }
+       if (gfs2_withdrawn(sdp)) {
+               gfs2_destroy_threads(sdp);
+               gfs2_quota_cleanup(sdp);
+       }
        WARN_ON(gfs2_withdrawing(sdp));
 
        /*  At this point, we're through modifying the disk  */
@@ -1134,6 +1121,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
                case GFS2_QUOTA_ON:
                        state = "on";
                        break;
+               case GFS2_QUOTA_QUIET:
+                       state = "quiet";
+                       break;
                default:
                        state = "unknown";
                        break;
index bba5862..ab9c831 100644 (file)
@@ -36,6 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
 extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
 extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
+extern void gfs2_destroy_threads(struct gfs2_sbd *sdp);
 extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
 extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
                               s64 dinodes);
index c60bc7f..60a0206 100644 (file)
@@ -98,7 +98,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     "sd_log_flush_head:        %d\n"
                     "sd_log_flush_tail:        %d\n"
                     "sd_log_blks_reserved:     %d\n"
-                    "sd_log_revokes_available: %d\n",
+                    "sd_log_revokes_available: %d\n"
+                    "sd_log_pinned:            %d\n"
+                    "sd_log_thresh1:           %d\n"
+                    "sd_log_thresh2:           %d\n",
                     test_bit(SDF_JOURNAL_CHECKED, &f),
                     test_bit(SDF_JOURNAL_LIVE, &f),
                     (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
@@ -118,7 +121,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     test_bit(SDF_WITHDRAW_IN_PROG, &f),
                     test_bit(SDF_REMOTE_WITHDRAW, &f),
                     test_bit(SDF_WITHDRAW_RECOVERY, &f),
-                    test_bit(SDF_DEACTIVATING, &f),
+                    test_bit(SDF_KILL, &f),
                     sdp->sd_log_error,
                     rwsem_is_locked(&sdp->sd_log_flush_lock),
                     sdp->sd_log_num_revoke,
@@ -128,7 +131,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     sdp->sd_log_flush_head,
                     sdp->sd_log_flush_tail,
                     sdp->sd_log_blks_reserved,
-                    atomic_read(&sdp->sd_log_revokes_available));
+                    atomic_read(&sdp->sd_log_revokes_available),
+                    atomic_read(&sdp->sd_log_pinned),
+                    atomic_read(&sdp->sd_log_thresh1),
+                    atomic_read(&sdp->sd_log_thresh2));
        return s;
 }
 
index dac22b1..da29faf 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/kthread.h>
 #include <linux/crc32.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/delay.h>
@@ -150,7 +151,14 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
        if (!sb_rdonly(sdp->sd_vfs)) {
                bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
 
-               gfs2_make_fs_ro(sdp);
+               wake_up(&sdp->sd_logd_waitq);
+               wake_up(&sdp->sd_quota_wait);
+
+               wait_event_timeout(sdp->sd_log_waitq,
+                                  gfs2_log_is_empty(sdp),
+                                  HZ * 5);
+
+               sdp->sd_vfs->s_flags |= SB_RDONLY;
 
                if (locked)
                        mutex_unlock(&sdp->sd_freeze_mutex);
@@ -315,19 +323,19 @@ int gfs2_withdraw(struct gfs2_sbd *sdp)
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
        const struct lm_lockops *lm = ls->ls_ops;
 
-       if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
-           test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
-               if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
-                       return -1;
-
-               wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
-                           TASK_UNINTERRUPTIBLE);
-               return -1;
-       }
-
-       set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
-
        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+               unsigned long old = READ_ONCE(sdp->sd_flags), new;
+
+               do {
+                       if (old & BIT(SDF_WITHDRAWN)) {
+                               wait_on_bit(&sdp->sd_flags,
+                                           SDF_WITHDRAW_IN_PROG,
+                                           TASK_UNINTERRUPTIBLE);
+                               return -1;
+                       }
+                       new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
+               } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));
+
                fs_err(sdp, "about to withdraw this file system\n");
                BUG_ON(sdp->sd_args.ar_debug);
 
index 5fffdde..cfec5e0 100644 (file)
@@ -571,12 +571,8 @@ static void init_once(void *foo)
 /*
  * Noinline to reduce binary size.
  */
-static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi)
 {
-       kfree(sbi->new_rec);
-       kvfree(ntfs_put_shared(sbi->upcase));
-       kfree(sbi->def_table);
-
        wnd_close(&sbi->mft.bitmap);
        wnd_close(&sbi->used.bitmap);
 
@@ -601,6 +597,13 @@ static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
        indx_clear(&sbi->security.index_sdh);
        indx_clear(&sbi->reparse.index_r);
        indx_clear(&sbi->objid.index_o);
+}
+
+static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+{
+       kfree(sbi->new_rec);
+       kvfree(ntfs_put_shared(sbi->upcase));
+       kfree(sbi->def_table);
        kfree(sbi->compress.lznt);
 #ifdef CONFIG_NTFS3_LZX_XPRESS
        xpress_free_decompressor(sbi->compress.xpress);
@@ -625,6 +628,7 @@ static void ntfs_put_super(struct super_block *sb)
 
        /* Mark rw ntfs as clear, if possible. */
        ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+       ntfs3_put_sbi(sbi);
 }
 
 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1644,8 +1648,10 @@ static void ntfs_fs_free(struct fs_context *fc)
        struct ntfs_mount_options *opts = fc->fs_private;
        struct ntfs_sb_info *sbi = fc->s_fs_info;
 
-       if (sbi)
+       if (sbi) {
+               ntfs3_put_sbi(sbi);
                ntfs3_free_sbi(sbi);
+       }
 
        if (opts)
                put_mount_options(opts);
index 0f2aa36..3dd5be9 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
 #include <linux/mount.h>
+#include <linux/ksm.h>
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
@@ -396,6 +397,7 @@ struct mem_size_stats {
        unsigned long swap;
        unsigned long shared_hugetlb;
        unsigned long private_hugetlb;
+       unsigned long ksm;
        u64 pss;
        u64 pss_anon;
        u64 pss_file;
@@ -452,6 +454,9 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
                        mss->lazyfree += size;
        }
 
+       if (PageKsm(page))
+               mss->ksm += size;
+
        mss->resident += size;
        /* Accumulate the size in pages that have been accessed. */
        if (young || page_is_young(page) || PageReferenced(page))
@@ -825,6 +830,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
        SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
        SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
        SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+       SEQ_PUT_DEC(" kB\nKSM:            ", mss->ksm);
        SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
        SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
        SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
index 136711a..6822ac7 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -311,6 +311,23 @@ int vfs_fstatat(int dfd, const char __user *filename,
        int statx_flags = flags | AT_NO_AUTOMOUNT;
        struct filename *name;
 
+       /*
+        * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH)
+        *
+        * If AT_EMPTY_PATH is set, we expect the common case to be that
+        * empty path, and avoid doing all the extra pathname work.
+        */
+       if (dfd >= 0 && flags == AT_EMPTY_PATH) {
+               char c;
+
+               ret = get_user(c, filename);
+               if (unlikely(ret))
+                       return ret;
+
+               if (likely(!c))
+                       return vfs_fstat(dfd, stat);
+       }
+
        name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
        ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
        putname(name);
diff --git a/include/asm-generic/ide_iops.h b/include/asm-generic/ide_iops.h
deleted file mode 100644 (file)
index 81dfa3e..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw     insw
-#define __ide_insl     insl
-#define __ide_outsw    outsw
-#define __ide_outsl    outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u16 *)addr = readw(port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u32 *)addr = readl(port);
-               addr += 4;
-       }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               writew(*(u16 *)addr, port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem * port, void *addr, u32 count)
-{
-       while (count--) {
-               writel(*(u32 *)addr, port);
-               addr += 4;
-       }
-}
index 847da6f..31029f4 100644 (file)
@@ -12,7 +12,7 @@
 
 #define ARMV8_PMU_CYCLE_IDX            (ARMV8_PMU_MAX_COUNTERS - 1)
 
-#ifdef CONFIG_HW_PERF_EVENTS
+#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
 
 struct kvm_pmc {
        u8 idx; /* index into the pmu->pmc array */
@@ -74,6 +74,7 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
 struct kvm_pmu_events *kvm_get_pmu_events(void);
 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
 void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+void kvm_vcpu_pmu_resync_el0(void);
 
 #define kvm_vcpu_has_pmu(vcpu)                                 \
        (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
@@ -171,6 +172,7 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
 {
        return 0;
 }
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
 
 #endif
 
index 6a3a9e1..51b1b70 100644 (file)
@@ -117,6 +117,8 @@ enum audit_nfcfgop {
        AUDIT_NFT_OP_OBJ_RESET,
        AUDIT_NFT_OP_FLOWTABLE_REGISTER,
        AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+       AUDIT_NFT_OP_SETELEM_RESET,
+       AUDIT_NFT_OP_RULE_RESET,
        AUDIT_NFT_OP_INVALID,
 };
 
index 12596af..024e8b2 100644 (file)
@@ -438,7 +438,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
        size /= sizeof(long);
        while (size--)
-               *ldst++ = *lsrc++;
+               data_race(*ldst++ = *lsrc++);
 }
 
 /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
index 49586ff..5f2301e 100644 (file)
@@ -359,14 +359,19 @@ enum {
 
 extern const char *ceph_mds_op_name(int op);
 
-
-#define CEPH_SETATTR_MODE   1
-#define CEPH_SETATTR_UID    2
-#define CEPH_SETATTR_GID    4
-#define CEPH_SETATTR_MTIME  8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE  32
-#define CEPH_SETATTR_CTIME 64
+#define CEPH_SETATTR_MODE              (1 << 0)
+#define CEPH_SETATTR_UID               (1 << 1)
+#define CEPH_SETATTR_GID               (1 << 2)
+#define CEPH_SETATTR_MTIME             (1 << 3)
+#define CEPH_SETATTR_ATIME             (1 << 4)
+#define CEPH_SETATTR_SIZE              (1 << 5)
+#define CEPH_SETATTR_CTIME             (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW         (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW         (1 << 8)
+#define CEPH_SETATTR_BTIME             (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID        (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH      (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE      (1 << 12)
 
 /*
  * Ceph setxattr request flags.
@@ -462,24 +467,26 @@ union ceph_mds_request_args {
 } __attribute__ ((packed));
 
 union ceph_mds_request_args_ext {
-       union ceph_mds_request_args old;
-       struct {
-               __le32 mode;
-               __le32 uid;
-               __le32 gid;
-               struct ceph_timespec mtime;
-               struct ceph_timespec atime;
-               __le64 size, old_size;       /* old_size needed by truncate */
-               __le32 mask;                 /* CEPH_SETATTR_* */
-               struct ceph_timespec btime;
-       } __attribute__ ((packed)) setattr_ext;
+       union {
+               union ceph_mds_request_args old;
+               struct {
+                       __le32 mode;
+                       __le32 uid;
+                       __le32 gid;
+                       struct ceph_timespec mtime;
+                       struct ceph_timespec atime;
+                       __le64 size, old_size;       /* old_size needed by truncate */
+                       __le32 mask;                 /* CEPH_SETATTR_* */
+                       struct ceph_timespec btime;
+               } __attribute__ ((packed)) setattr_ext;
+       };
 };
 
 #define CEPH_MDS_FLAG_REPLAY           1 /* this is a replayed op */
 #define CEPH_MDS_FLAG_WANT_DENTRY      2 /* want dentry in reply */
 #define CEPH_MDS_FLAG_ASYNC            4 /* request is asynchronous */
 
-struct ceph_mds_request_head_old {
+struct ceph_mds_request_head_legacy {
        __le64 oldest_client_tid;
        __le32 mdsmap_epoch;           /* on client */
        __le32 flags;                  /* CEPH_MDS_FLAG_* */
@@ -492,9 +499,9 @@ struct ceph_mds_request_head_old {
        union ceph_mds_request_args args;
 } __attribute__ ((packed));
 
-#define CEPH_MDS_REQUEST_HEAD_VERSION  1
+#define CEPH_MDS_REQUEST_HEAD_VERSION  2
 
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
        __le16 version;                /* struct version */
        __le64 oldest_client_tid;
        __le32 mdsmap_epoch;           /* on client */
@@ -508,6 +515,23 @@ struct ceph_mds_request_head {
        union ceph_mds_request_args_ext args;
 } __attribute__ ((packed));
 
+struct ceph_mds_request_head {
+       __le16 version;                /* struct version */
+       __le64 oldest_client_tid;
+       __le32 mdsmap_epoch;           /* on client */
+       __le32 flags;                  /* CEPH_MDS_FLAG_* */
+       __u8 num_retry, num_fwd;       /* legacy count retry and fwd attempts */
+       __le16 num_releases;           /* # include cap/lease release records */
+       __le32 op;                     /* mds op code */
+       __le32 caller_uid, caller_gid;
+       __le64 ino;                    /* use this ino for openc, mkdir, mknod,
+                                         etc. (if replaying) */
+       union ceph_mds_request_args_ext args;
+
+       __le32 ext_num_retry;          /* new count retry attempts */
+       __le32 ext_num_fwd;            /* new count fwd attempts */
+} __attribute__ ((packed));
+
 /* cap/lease release record */
 struct ceph_mds_request_release {
        __le64 ino, cap_id;            /* ino and unique cap id */
index 99c1726..2eaaabb 100644 (file)
@@ -17,6 +17,7 @@
 
 struct ceph_msg;
 struct ceph_connection;
+struct ceph_msg_data_cursor;
 
 /*
  * Ceph defines these callbacks for handling connection events.
@@ -70,6 +71,30 @@ struct ceph_connection_operations {
                                      int used_proto, int result,
                                      const int *allowed_protos, int proto_cnt,
                                      const int *allowed_modes, int mode_cnt);
+
+       /**
+        * sparse_read: read sparse data
+        * @con: connection we're reading from
+        * @cursor: data cursor for reading extents
+        * @buf: optional buffer to read into
+        *
+        * This should be called more than once, each time setting up to
+        * receive an extent into the current cursor position, and zeroing
+        * the holes between them.
+        *
+        * Returns amount of data to be read (in bytes), 0 if reading is
+        * complete, or -errno if there was an error.
+        *
+        * If @buf is set on a >0 return, then the data should be read into
+        * the provided buffer. Otherwise, it should be read into the cursor.
+        *
+        * The sparse read operation is expected to initialize the cursor
+        * with a length covering up to the end of the last extent.
+        */
+       int (*sparse_read)(struct ceph_connection *con,
+                          struct ceph_msg_data_cursor *cursor,
+                          char **buf);
+
 };
 
 /* use format string %s%lld */
@@ -98,6 +123,7 @@ enum ceph_msg_data_type {
        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
        CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
+       CEPH_MSG_DATA_ITER,     /* data source/destination is an iov_iter */
 };
 
 #ifdef CONFIG_BLOCK
@@ -199,6 +225,7 @@ struct ceph_msg_data {
                        bool            own_pages;
                };
                struct ceph_pagelist    *pagelist;
+               struct iov_iter         iter;
        };
 };
 
@@ -207,6 +234,7 @@ struct ceph_msg_data_cursor {
 
        struct ceph_msg_data    *data;          /* current data item */
        size_t                  resid;          /* bytes not yet consumed */
+       int                     sr_resid;       /* residual sparse_read len */
        bool                    need_crc;       /* crc update needed */
        union {
 #ifdef CONFIG_BLOCK
@@ -222,6 +250,10 @@ struct ceph_msg_data_cursor {
                        struct page     *page;          /* page from list */
                        size_t          offset;         /* bytes from list */
                };
+               struct {
+                       struct iov_iter         iov_iter;
+                       unsigned int            lastlen;
+               };
        };
 };
 
@@ -251,6 +283,7 @@ struct ceph_msg {
        struct kref kref;
        bool more_to_follow;
        bool needs_out_seq;
+       bool sparse_read;
        int front_alloc_len;
 
        struct ceph_msgpool *pool;
@@ -309,6 +342,10 @@ struct ceph_connection_v1_info {
 
        int in_base_pos;     /* bytes read */
 
+       /* sparse reads */
+       struct kvec in_sr_kvec; /* current location to receive into */
+       u64 in_sr_len;          /* amount of data in this extent */
+
        /* message in temps */
        u8 in_tag;           /* protocol control byte */
        struct ceph_msg_header in_hdr;
@@ -395,6 +432,7 @@ struct ceph_connection_v2_info {
 
        void *conn_bufs[16];
        int conn_buf_cnt;
+       int data_len_remain;
 
        struct kvec in_sign_kvecs[8];
        struct kvec out_sign_kvecs[8];
@@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 #endif /* CONFIG_BLOCK */
 void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
                             struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+                           struct iov_iter *iter);
 
 struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
                               gfp_t flags, bool can_fail);
index fb6be72..bf98239 100644 (file)
@@ -29,14 +29,62 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
 
 #define CEPH_HOMELESS_OSD      -1
 
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+       u64     off;
+       u64     len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+       CEPH_SPARSE_READ_HDR    = 0,
+       CEPH_SPARSE_READ_EXTENTS,
+       CEPH_SPARSE_READ_DATA_LEN,
+       CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+       enum ceph_sparse_read_state     sr_state;    /* state machine state */
+       u64                             sr_req_off;  /* orig request offset */
+       u64                             sr_req_len;  /* orig request length */
+       u64                             sr_pos;      /* current pos in buffer */
+       int                             sr_index;    /* current extent index */
+       __le32                          sr_datalen;  /* length of actual data */
+       u32                             sr_count;    /* extent count in reply */
+       int                             sr_ext_len;  /* length of extent array */
+       struct ceph_sparse_extent       *sr_extent;  /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
 struct ceph_osd {
        refcount_t o_ref;
+       int o_sparse_op_idx;
        struct ceph_osd_client *o_osdc;
        int o_osd;
        int o_incarnation;
        struct rb_node o_node;
        struct ceph_connection o_con;
+       spinlock_t o_requests_lock;
        struct rb_root o_requests;
        struct rb_root o_linger_requests;
        struct rb_root o_backoff_mappings;
@@ -46,6 +94,7 @@ struct ceph_osd {
        unsigned long lru_ttl;
        struct list_head o_keepalive_item;
        struct mutex lock;
+       struct ceph_sparse_read o_sparse_read;
 };
 
 #define CEPH_OSD_SLAB_OPS      2
@@ -59,6 +108,7 @@ enum ceph_osd_data_type {
        CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
        CEPH_OSD_DATA_TYPE_BVECS,
+       CEPH_OSD_DATA_TYPE_ITER,
 };
 
 struct ceph_osd_data {
@@ -82,6 +132,7 @@ struct ceph_osd_data {
                        struct ceph_bvec_iter   bvec_pos;
                        u32                     num_bvecs;
                };
+               struct iov_iter         iter;
        };
 };
 
@@ -98,6 +149,8 @@ struct ceph_osd_req_op {
                        u64 offset, length;
                        u64 truncate_size;
                        u32 truncate_seq;
+                       int sparse_ext_cnt;
+                       struct ceph_sparse_extent *sparse_ext;
                        struct ceph_osd_data osd_data;
                } extent;
                struct {
@@ -145,6 +198,9 @@ struct ceph_osd_req_op {
                        u32 src_fadvise_flags;
                        struct ceph_osd_data osd_data;
                } copy_from;
+               struct {
+                       u64 ver;
+               } assert_ver;
        };
 };
 
@@ -199,6 +255,7 @@ struct ceph_osd_request {
        struct ceph_osd_client *r_osdc;
        struct kref       r_kref;
        bool              r_mempool;
+       bool              r_linger;           /* don't resend on failure */
        struct completion r_completion;       /* private to osd_client.c */
        ceph_osdc_callback_t r_callback;
 
@@ -211,9 +268,9 @@ struct ceph_osd_request {
        struct ceph_snap_context *r_snapc;    /* for writes */
        struct timespec64 r_mtime;            /* ditto */
        u64 r_data_offset;                    /* ditto */
-       bool r_linger;                        /* don't resend on failure */
 
        /* internal */
+       u64 r_version;                        /* data version sent in reply */
        unsigned long r_stamp;                /* jiffies, send or check time */
        unsigned long r_start_stamp;          /* jiffies */
        ktime_t r_start_latency;              /* ktime_t */
@@ -450,6 +507,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
                                         unsigned int which,
                                         struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+                               unsigned int which, struct iov_iter *iter);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
                                        unsigned int which,
@@ -504,6 +563,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
                                      u32 truncate_seq, u64 truncate_size,
                                      bool use_mempool);
 
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value.  If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+{
+       return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+}
+
 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 
@@ -558,5 +631,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
                            struct ceph_object_locator *oloc,
                            struct ceph_watch_item **watchers,
                            u32 *num_watchers);
-#endif
 
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+       struct ceph_sparse_extent *ext;
+
+       /* No extents? No data */
+       if (op->extent.sparse_ext_cnt == 0)
+               return 0;
+
+       ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+       return ext->off + ext->len - op->extent.offset;
+}
+
+#endif
index 43a7a15..73c3efb 100644 (file)
@@ -524,6 +524,10 @@ struct ceph_osd_op {
                        __le64 cookie;
                } __attribute__ ((packed)) notify;
                struct {
+                       __le64 unused;
+                       __le64 ver;
+               } __attribute__ ((packed)) assert_ver;
+               struct {
                        __le64 offset, length;
                        __le64 src_offset;
                } __attribute__ ((packed)) clonerange;
index 43b363a..71d186d 100644 (file)
@@ -141,6 +141,9 @@ struct cpufreq_policy {
         */
        bool                    dvfs_possible_from_any_cpu;
 
+       /* Per policy boost enabled flag. */
+       bool                    boost_enabled;
+
         /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
        unsigned int cached_target_freq;
        unsigned int cached_resolved_idx;
index beed838..9911508 100644 (file)
@@ -50,7 +50,7 @@ extern struct module __this_module;
                __EXPORT_SYMBOL_REF(sym)        ASM_NL  \
        .previous
 
-#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
+#if defined(__DISABLE_EXPORTS)
 
 /*
  * Allow symbol exports to be disabled completely so that C code may
@@ -75,7 +75,7 @@ extern struct module __this_module;
        __ADDRESSABLE(sym)                                      \
        asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
 
-#endif /* CONFIG_MODULES */
+#endif
 
 #ifdef DEFAULT_SYMBOL_NAMESPACE
 #define _EXPORT_SYMBOL(sym, license)   __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE))
index 0a221e7..07e3701 100644 (file)
@@ -63,7 +63,7 @@ struct gameport_driver {
 int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode);
 void gameport_close(struct gameport *gameport);
 
-#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE))
+#if IS_REACHABLE(CONFIG_GAMEPORT)
 
 void __gameport_register_port(struct gameport *gameport, struct module *owner);
 /* use a define to avoid include chaining to get THIS_MODULE */
index 5883551..af8a771 100644 (file)
@@ -147,6 +147,7 @@ struct inet6_skb_parm {
 #define IP6SKB_JUMBOGRAM      128
 #define IP6SKB_SEG6          256
 #define IP6SKB_FAKEJUMBO      512
+#define IP6SKB_MULTIPATH      1024
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
index 819b6bc..3df5499 100644 (file)
@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
 int kasan_populate_early_shadow(const void *shadow_start,
                                const void *shadow_end);
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline void *kasan_mem_to_shadow(const void *addr)
 {
        return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
                + KASAN_SHADOW_OFFSET;
 }
+#endif
 
 int kasan_add_zero_shadow(void *start, unsigned long size);
 void kasan_remove_zero_shadow(void *start, unsigned long size);
index 9d3ac77..fb6c610 100644 (file)
@@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
                                      struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
-                               unsigned long *vcpu_bitmap);
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID            0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
@@ -256,11 +254,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+       pte_t pte;
+};
+
 struct kvm_gfn_range {
        struct kvm_memory_slot *slot;
        gfn_t start;
        gfn_t end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        bool may_block;
 };
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -865,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
        unlikely(__ret);                                        \
 })
 
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data.  Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm)                  \
+({                                                             \
+       bool __ret = !!(cond);                                  \
+                                                               \
+       if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION))          \
+               BUG_ON(__ret);                                  \
+       else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))      \
+               kvm_vm_bugged(kvm);                             \
+       unlikely(__ret);                                        \
+})
+
 static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PROVE_RCU
@@ -1359,6 +1380,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot);
 
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1387,10 +1411,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                        unsigned long mask);
 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
 
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
                      int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1479,11 +1500,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        return -ENOTSUPP;
 }
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                                   gfn_t gfn, u64 nr_pages)
+{
+       return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
 #endif
 
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -2148,8 +2181,6 @@ struct kvm_device_ops {
        int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
 };
 
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
 void kvm_unregister_device_ops(u32 type);
index 820f7a3..52d58b1 100644 (file)
@@ -344,7 +344,6 @@ enum {
        ATA_LINK_RESUME_TRIES   = 5,
 
        /* how hard are we gonna try to probe/recover devices */
-       ATA_PROBE_MAX_TRIES     = 3,
        ATA_EH_DEV_TRIES        = 3,
        ATA_EH_PMP_TRIES        = 5,
        ATA_EH_PMP_LINK_TRIES   = 3,
@@ -977,12 +976,6 @@ struct ata_port_operations {
                                        ssize_t size);
 
        /*
-        * Obsolete
-        */
-       void (*phy_reset)(struct ata_port *ap);
-       void (*eng_timeout)(struct ata_port *ap);
-
-       /*
         * ->inherits must be the last field and all the preceding
         * fields must be pointers.
         */
@@ -1116,7 +1109,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap)
 extern int ata_ratelimit(void);
 extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
 extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
-                       u32 val, unsigned long interval, unsigned long timeout);
+                            u32 val, unsigned int interval, unsigned int timeout);
 extern int atapi_cmd_type(u8 opcode);
 extern unsigned int ata_pack_xfermask(unsigned int pio_mask,
                                      unsigned int mwdma_mask,
@@ -1166,11 +1159,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *
  * SATA specific code - drivers/ata/libata-sata.c
  */
 #ifdef CONFIG_SATA_HOST
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
+extern const unsigned int sata_deb_timing_normal[];
+extern const unsigned int sata_deb_timing_hotplug[];
+extern const unsigned int sata_deb_timing_long[];
 
-static inline const unsigned long *
+static inline const unsigned int *
 sata_ehc_deb_timing(struct ata_eh_context *ehc)
 {
        if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
@@ -1185,14 +1178,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
 extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
 extern int sata_set_spd(struct ata_link *link);
 extern int sata_link_hardreset(struct ata_link *link,
-                       const unsigned long *timing, unsigned long deadline,
+                       const unsigned int *timing, unsigned long deadline,
                        bool *online, int (*check_ready)(struct ata_link *));
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+extern int sata_link_resume(struct ata_link *link, const unsigned int *params,
                            unsigned long deadline);
 extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
 extern void ata_eh_analyze_ncq_error(struct ata_link *link);
 #else
-static inline const unsigned long *
+static inline const unsigned int *
 sata_ehc_deb_timing(struct ata_eh_context *ehc)
 {
        return NULL;
@@ -1212,7 +1205,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
 }
 static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
 static inline int sata_link_hardreset(struct ata_link *link,
-                                     const unsigned long *timing,
+                                     const unsigned int *timing,
                                      unsigned long deadline,
                                      bool *online,
                                      int (*check_ready)(struct ata_link *))
@@ -1222,7 +1215,7 @@ static inline int sata_link_hardreset(struct ata_link *link,
        return -EOPNOTSUPP;
 }
 static inline int sata_link_resume(struct ata_link *link,
-                                  const unsigned long *params,
+                                  const unsigned int *params,
                                   unsigned long deadline)
 {
        return -EOPNOTSUPP;
@@ -1234,20 +1227,15 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
 static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
 #endif
 extern int sata_link_debounce(struct ata_link *link,
-                       const unsigned long *params, unsigned long deadline);
+                             const unsigned int *params, unsigned long deadline);
 extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                             bool spm_wakeup);
 extern int ata_slave_link_init(struct ata_port *ap);
-extern void ata_sas_port_destroy(struct ata_port *);
 extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
                                           struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
+extern void ata_port_probe(struct ata_port *ap);
 extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
 extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
 extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
 extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
 extern void ata_tf_to_fis(const struct ata_taskfile *tf,
@@ -1785,7 +1773,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap,
 {
        struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-       if (unlikely(!qc) || !ap->ops->error_handler)
+       if (unlikely(!qc))
                return qc;
 
        if ((qc->flags & (ATA_QCFLAG_ACTIVE |
@@ -1876,7 +1864,7 @@ static inline int ata_check_ready(u8 status)
 }
 
 static inline unsigned long ata_deadline(unsigned long from_jiffies,
-                                        unsigned long timeout_msecs)
+                                        unsigned int timeout_msecs)
 {
        return from_jiffies + msecs_to_jiffies(timeout_msecs);
 }
index 8bef1ab..4e27ca7 100644 (file)
 #define        PHY_ID_KSZ9477          0x00221631
 
 /* struct phy_device dev_flags definitions */
-#define MICREL_PHY_50MHZ_CLK   0x00000001
-#define MICREL_PHY_FXEN                0x00000002
-#define MICREL_KSZ8_P1_ERRATA  0x00000003
+#define MICREL_PHY_50MHZ_CLK   BIT(0)
+#define MICREL_PHY_FXEN                BIT(1)
+#define MICREL_KSZ8_P1_ERRATA  BIT(2)
+#define MICREL_NO_EEE          BIT(3)
 
 #define MICREL_KSZ9021_EXTREG_CTRL     0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE       0xC
index ed67981..6a9ddf2 100644 (file)
@@ -1676,8 +1676,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb);
 
 #else
 
-static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size,
-                                      int *ovcs_id)
+static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+                                      int *ovcs_id, struct device_node *target_base)
 {
        return -ENOTSUPP;
 }
index 7d07f87..2b886ea 100644 (file)
@@ -600,7 +600,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
  *
  * The %neg_mode argument should be tested via the phylink_mode_*() family of
  * functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
  */
 int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
               phy_interface_t interface, const unsigned long *advertising,
@@ -630,7 +630,7 @@ void pcs_an_restart(struct phylink_pcs *pcs);
  *
  * The %mode argument should be tested via the phylink_mode_*() family of
  * functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
  */
 void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
                 phy_interface_t interface, int speed, int duplex);
diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h
deleted file mode 100644 (file)
index 22c5382..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * ds2404.h - platform data structure for the DS2404 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
- */
-
-#ifndef __LINUX_DS2404_H
-#define __LINUX_DS2404_H
-
-struct ds2404_platform_data {
-
-       unsigned int gpio_rst;
-       unsigned int gpio_clk;
-       unsigned int gpio_dq;
-};
-#endif
index 04ae1d9..d2f9f69 100644 (file)
@@ -298,7 +298,7 @@ struct pwm_chip {
        int base;
        unsigned int npwm;
 
-       struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
+       struct pwm_device * (*of_xlate)(struct pwm_chip *chip,
                                        const struct of_phandle_args *args);
        unsigned int of_pwm_n_cells;
 
@@ -395,9 +395,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
                                         unsigned int index,
                                         const char *label);
 
-struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip,
                const struct of_phandle_args *args);
-struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip,
                                       const struct of_phandle_args *args);
 
 struct pwm_device *pwm_get(struct device *dev, const char *con_id);
index f29aaaf..006e18d 100644 (file)
@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
 extern const struct raid6_calls raid6_vpermxor2;
 extern const struct raid6_calls raid6_vpermxor4;
 extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
 
 struct raid6_recov_calls {
        void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
 extern const struct raid6_recov_calls raid6_recov_avx512;
 extern const struct raid6_recov_calls raid6_recov_s390xc;
 extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
index a3825ce..51cc21e 100644 (file)
@@ -479,7 +479,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
 
 #define anon_vma_init()                do {} while (0)
 #define anon_vma_prepare(vma)  (0)
-#define anon_vma_link(vma)     do {} while (0)
 
 static inline int folio_referenced(struct folio *folio, int is_locked,
                                  struct mem_cgroup *memcg,
index 1fd9c6a..4c0bcbe 100644 (file)
@@ -146,6 +146,7 @@ struct rtc_device {
 
        time64_t range_min;
        timeu64_t range_max;
+       timeu64_t alarm_offset_max;
        time64_t start_secs;
        time64_t offset_secs;
        bool set_start_time;
index b0d36a9..5cf6f6f 100644 (file)
@@ -25,7 +25,6 @@ struct tca6416_keys_platform_data {
        unsigned int rep:1;     /* enable input subsystem auto repeat */
        uint16_t pinmask;
        uint16_t invert;
-       int irq_is_gpio;
        int use_polling;        /* use polling if Interrupt is not connected*/
 };
 #endif
index 19adacd..3489a1c 100644 (file)
@@ -57,6 +57,7 @@ struct inet_skb_parm {
 #define IPSKB_FRAG_PMTU                BIT(6)
 #define IPSKB_L3SLAVE          BIT(7)
 #define IPSKB_NOPOLICY         BIT(8)
+#define IPSKB_MULTIPATH                BIT(9)
 
        u16                     frag_max_size;
 };
@@ -94,7 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
        ipcm_init(ipcm);
 
        ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
-       ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+       ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
        ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
        ipcm->addr = inet->inet_saddr;
        ipcm->protocol = inet->inet_num;
index c9ff23c..1ba9f4d 100644 (file)
@@ -642,7 +642,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
        if (!net->ipv6.fib6_rules_require_fldissect)
                return false;
 
-       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       memset(flkeys, 0, sizeof(*flkeys));
+       __skb_flow_dissect(net, skb, &flow_keys_dissector,
+                          flkeys, NULL, 0, 0, 0, flag);
+
        fl6->fl6_sport = flkeys->ports.src;
        fl6->fl6_dport = flkeys->ports.dst;
        fl6->flowi6_proto = flkeys->basic.ip_proto;
index a378eff..f0c1386 100644 (file)
@@ -418,7 +418,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
        if (!net->ipv4.fib_rules_require_fldissect)
                return false;
 
-       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       memset(flkeys, 0, sizeof(*flkeys));
+       __skb_flow_dissect(net, skb, &flow_keys_dissector,
+                          flkeys, NULL, 0, 0, 0, flag);
+
        fl4->fl4_sport = flkeys->ports.src;
        fl4->fl4_dport = flkeys->ports.dst;
        fl4->flowi4_proto = flkeys->basic.ip_proto;
index e8750b4..f346b4e 100644 (file)
@@ -483,15 +483,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
                u64_stats_inc(&tstats->tx_packets);
                u64_stats_update_end(&tstats->syncp);
                put_cpu_ptr(tstats);
+               return;
+       }
+
+       if (pkt_len < 0) {
+               DEV_STATS_INC(dev, tx_errors);
+               DEV_STATS_INC(dev, tx_aborted_errors);
        } else {
-               struct net_device_stats *err_stats = &dev->stats;
-
-               if (pkt_len < 0) {
-                       err_stats->tx_errors++;
-                       err_stats->tx_aborted_errors++;
-               } else {
-                       err_stats->tx_dropped++;
-               }
+               DEV_STATS_INC(dev, tx_dropped);
        }
 }
 
index c5bcdf6..e8c76b4 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/sched/signal.h>
+#include <net/compat.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -123,14 +124,17 @@ static inline bool scm_has_secdata(struct socket *sock)
 static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
 {
        struct file *pidfd_file = NULL;
-       int pidfd;
+       int len, pidfd;
 
-       /*
-        * put_cmsg() doesn't return an error if CMSG is truncated,
+       /* put_cmsg() doesn't return an error if CMSG is truncated,
         * that's why we need to opencode these checks here.
         */
-       if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
-           (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+       if (msg->msg_flags & MSG_CMSG_COMPAT)
+               len = sizeof(struct compat_cmsghdr) + sizeof(int);
+       else
+               len = sizeof(struct cmsghdr) + sizeof(int);
+
+       if (msg->msg_controllen < len) {
                msg->msg_flags |= MSG_CTRUNC;
                return;
        }
index 11d5034..b770261 100644 (file)
@@ -1053,6 +1053,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val)
        WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
 }
 
+static inline void sk_forward_alloc_add(struct sock *sk, int val)
+{
+       /* Paired with lockless reads of sk->sk_forward_alloc */
+       WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
+}
+
 void sk_stream_write_space(struct sock *sk);
 
 /* OOB backlog add */
@@ -1377,7 +1383,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk)
        if (sk->sk_prot->forward_alloc_get)
                return sk->sk_prot->forward_alloc_get(sk);
 #endif
-       return sk->sk_forward_alloc;
+       return READ_ONCE(sk->sk_forward_alloc);
 }
 
 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
@@ -1673,14 +1679,14 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 {
        if (!sk_has_account(sk))
                return;
-       sk->sk_forward_alloc -= size;
+       sk_forward_alloc_add(sk, -size);
 }
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
        if (!sk_has_account(sk))
                return;
-       sk->sk_forward_alloc += size;
+       sk_forward_alloc_add(sk, size);
        sk_mem_reclaim(sk);
 }
 
@@ -1900,7 +1906,9 @@ struct sockcm_cookie {
 static inline void sockcm_init(struct sockcm_cookie *sockc,
                               const struct sock *sk)
 {
-       *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+       *sockc = (struct sockcm_cookie) {
+               .tsflags = READ_ONCE(sk->sk_tsflags)
+       };
 }
 
 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
@@ -2695,9 +2703,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 static inline void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
-       ktime_t kt = skb->tstamp;
        struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
-
+       u32 tsflags = READ_ONCE(sk->sk_tsflags);
+       ktime_t kt = skb->tstamp;
        /*
         * generate control messages if
         * - receive time stamping in software requested
@@ -2705,10 +2713,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
         * - hardware time stamps available and wanted
         */
        if (sock_flag(sk, SOCK_RCVTSTAMP) ||
-           (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
-           (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+           (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+           (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
            (hwtstamps->hwtstamp &&
-            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
+            (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
                __sock_recv_timestamp(msg, sk, skb);
        else
                sock_write_timestamp(sk, kt);
@@ -2730,7 +2738,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 #define TSFLAGS_ANY      (SOF_TIMESTAMPING_SOFTWARE                    | \
                           SOF_TIMESTAMPING_RAW_HARDWARE)
 
-       if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
+       if (sk->sk_flags & FLAGS_RECV_CMSGS ||
+           READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
                __sock_recv_cmsgs(msg, sk, skb);
        else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
                sock_write_timestamp(sk, skb->tstamp);
index c9a8bce..d70c55f 100644 (file)
@@ -142,7 +142,7 @@ struct snd_dmaengine_pcm_config {
                        struct snd_pcm_substream *substream);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes);
+                      unsigned long bytes);
        dma_filter_fn compat_filter_fn;
        struct device *dma_dev;
        const char *chan_names[SNDRV_PCM_STREAM_LAST + 1];
index 17bea31..ceca69b 100644 (file)
@@ -139,7 +139,7 @@ struct snd_soc_component_driver {
                struct snd_pcm_audio_tstamp_report *audio_tstamp_report);
        int (*copy)(struct snd_soc_component *component,
                    struct snd_pcm_substream *substream, int channel,
-                   unsigned long pos, struct iov_iter *buf,
+                   unsigned long pos, struct iov_iter *iter,
                    unsigned long bytes);
        struct page *(*page)(struct snd_soc_component *component,
                             struct snd_pcm_substream *substream,
@@ -511,7 +511,7 @@ int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream);
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes);
+                              struct iov_iter *iter, unsigned long bytes);
 struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
                                        unsigned long offset);
 int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
index b3fcab1..db92a72 100644 (file)
  *  - add FUSE_EXT_GROUPS
  *  - add FUSE_CREATE_SUPP_GROUP
  *  - add FUSE_HAS_EXPIRE_ONLY
+ *
+ *  7.39
+ *  - add FUSE_DIRECT_IO_RELAX
+ *  - add FUSE_STATX and related structures
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 38
+#define FUSE_KERNEL_MINOR_VERSION 39
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -269,6 +273,40 @@ struct fuse_attr {
        uint32_t        flags;
 };
 
+/*
+ * The following structures are bit-for-bit compatible with the statx(2) ABI in
+ * Linux.
+ */
+struct fuse_sx_time {
+       int64_t         tv_sec;
+       uint32_t        tv_nsec;
+       int32_t         __reserved;
+};
+
+struct fuse_statx {
+       uint32_t        mask;
+       uint32_t        blksize;
+       uint64_t        attributes;
+       uint32_t        nlink;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint16_t        mode;
+       uint16_t        __spare0[1];
+       uint64_t        ino;
+       uint64_t        size;
+       uint64_t        blocks;
+       uint64_t        attributes_mask;
+       struct fuse_sx_time     atime;
+       struct fuse_sx_time     btime;
+       struct fuse_sx_time     ctime;
+       struct fuse_sx_time     mtime;
+       uint32_t        rdev_major;
+       uint32_t        rdev_minor;
+       uint32_t        dev_major;
+       uint32_t        dev_minor;
+       uint64_t        __spare2[14];
+};
+
 struct fuse_kstatfs {
        uint64_t        blocks;
        uint64_t        bfree;
@@ -371,6 +409,8 @@ struct fuse_file_lock {
  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
  *                     symlink and mknod (single group that matches parent)
  * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
+ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
+ *                       allow shared mmap
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -409,6 +449,7 @@ struct fuse_file_lock {
 #define FUSE_HAS_INODE_DAX     (1ULL << 33)
 #define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
 #define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
+#define FUSE_DIRECT_IO_RELAX   (1ULL << 36)
 
 /**
  * CUSE INIT request/reply flags
@@ -575,6 +616,7 @@ enum fuse_opcode {
        FUSE_REMOVEMAPPING      = 49,
        FUSE_SYNCFS             = 50,
        FUSE_TMPFILE            = 51,
+       FUSE_STATX              = 52,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
@@ -639,6 +681,22 @@ struct fuse_attr_out {
        struct fuse_attr attr;
 };
 
+struct fuse_statx_in {
+       uint32_t        getattr_flags;
+       uint32_t        reserved;
+       uint64_t        fh;
+       uint32_t        sx_flags;
+       uint32_t        sx_mask;
+};
+
+struct fuse_statx_out {
+       uint64_t        attr_valid;     /* Cache timeout for the attributes */
+       uint32_t        attr_valid_nsec;
+       uint32_t        flags;
+       uint64_t        spare[2];
+       struct fuse_statx stat;
+};
+
 #define FUSE_COMPAT_MKNOD_IN_SIZE 8
 
 struct fuse_mknod_in {
index 8466c2a..ca30232 100644 (file)
@@ -263,6 +263,7 @@ enum nft_chain_attributes {
  * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
  * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
  * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32)
+ * @NFTA_RULE_CHAIN_ID: add the rule to chain by ID, alternative to @NFTA_RULE_CHAIN (NLA_U32)
  */
 enum nft_rule_attributes {
        NFTA_RULE_UNSPEC,
index b0cb763..21d2fa8 100644 (file)
@@ -143,6 +143,8 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
        { AUDIT_NFT_OP_OBJ_RESET,               "nft_reset_obj"            },
        { AUDIT_NFT_OP_FLOWTABLE_REGISTER,      "nft_register_flowtable"   },
        { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,    "nft_unregister_flowtable" },
+       { AUDIT_NFT_OP_SETELEM_RESET,           "nft_reset_setelem"        },
+       { AUDIT_NFT_OP_RULE_RESET,              "nft_reset_rule"           },
        { AUDIT_NFT_OP_INVALID,                 "nft_invalid"              },
 };
 
index b5149cf..146824c 100644 (file)
@@ -553,7 +553,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                         void *value, u64 map_flags, gfp_t gfp_flags)
 {
        struct bpf_local_storage_data *old_sdata = NULL;
-       struct bpf_local_storage_elem *selem = NULL;
+       struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
        struct bpf_local_storage *local_storage;
        unsigned long flags;
        int err;
@@ -607,11 +607,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                }
        }
 
-       if (gfp_flags == GFP_KERNEL) {
-               selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
-               if (!selem)
-                       return ERR_PTR(-ENOMEM);
-       }
+       /* A lookup has just been done before and concluded a new selem is
+        * needed. The chance of an unnecessary alloc is unlikely.
+        */
+       alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+       if (!alloc_selem)
+               return ERR_PTR(-ENOMEM);
 
        raw_spin_lock_irqsave(&local_storage->lock, flags);
 
@@ -623,13 +624,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                 * simple.
                 */
                err = -EAGAIN;
-               goto unlock_err;
+               goto unlock;
        }
 
        old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
        err = check_flags(old_sdata, map_flags);
        if (err)
-               goto unlock_err;
+               goto unlock;
 
        if (old_sdata && (map_flags & BPF_F_LOCK)) {
                copy_map_value_locked(&smap->map, old_sdata->data, value,
@@ -638,23 +639,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                goto unlock;
        }
 
-       if (gfp_flags != GFP_KERNEL) {
-               /* local_storage->lock is held.  Hence, we are sure
-                * we can unlink and uncharge the old_sdata successfully
-                * later.  Hence, instead of charging the new selem now
-                * and then uncharge the old selem later (which may cause
-                * a potential but unnecessary charge failure),  avoid taking
-                * a charge at all here (the "!old_sdata" check) and the
-                * old_sdata will not be uncharged later during
-                * bpf_selem_unlink_storage_nolock().
-                */
-               selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
-               if (!selem) {
-                       err = -ENOMEM;
-                       goto unlock_err;
-               }
-       }
-
+       alloc_selem = NULL;
        /* First, link the new selem to the map */
        bpf_selem_link_map(smap, selem);
 
@@ -665,20 +650,16 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
        if (old_sdata) {
                bpf_selem_unlink_map(SELEM(old_sdata));
                bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
-                                               false, false);
+                                               true, false);
        }
 
 unlock:
        raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-       return SDATA(selem);
-
-unlock_err:
-       raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-       if (selem) {
+       if (alloc_selem) {
                mem_uncharge(smap, owner, smap->elem_size);
-               bpf_selem_free(selem, smap, true);
+               bpf_selem_free(alloc_selem, smap, true);
        }
-       return ERR_PTR(err);
+       return err ? ERR_PTR(err) : SDATA(selem);
 }
 
 static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
@@ -779,7 +760,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
                 * of the loop will set the free_cgroup_storage to true.
                 */
                free_storage = bpf_selem_unlink_storage_nolock(
-                       local_storage, selem, false, true);
+                       local_storage, selem, true, true);
        }
        raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 
index ebeb069..eb01c31 100644 (file)
@@ -5502,9 +5502,9 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
                }
 
                run_ctx.bpf_cookie = 0;
-               run_ctx.saved_run_ctx = NULL;
                if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
                        /* recursion detected */
+                       __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
                        bpf_prog_put(prog);
                        return -EBUSY;
                }
index 78acf28..53ff50c 100644 (file)
@@ -926,13 +926,12 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
        migrate_disable();
        might_fault();
 
+       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
        if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
                bpf_prog_inc_misses_counter(prog);
                return 0;
        }
-
-       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
-
        return bpf_prog_start_time();
 }
 
index e8db8d9..4722b99 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Debugging for CI systems and finding regressions
+#
 # The config is based on running daily CI for enterprise Linux distros to
 # seek regressions on linux-next builds on different bare-metal and virtual
 # platforms. It can be used for example,
index 208481d..d087706 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Bootable as a KVM guest
 CONFIG_NET=y
 CONFIG_NET_CORE=y
 CONFIG_NETDEVICES=y
index 81ff078..ebfdc3d 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Disable Power Management
+
 CONFIG_PM=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
index 38a7c53..2c6e001 100644 (file)
@@ -1 +1,2 @@
+# Help: Enable Rust
 CONFIG_RUST=y
index 6fac5b4..35f4867 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Debugging options for tip tree testing
 CONFIG_X86_DEBUG_FPU=y
 CONFIG_LOCK_STAT=y
 CONFIG_DEBUG_VM=y
index 436f806..6878b9a 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Bootable as a Xen guest
+#
 # global stuff - these enable us to allow some
 # of the not so generic stuff below for xen
 CONFIG_PARAVIRT=y
index 96fc38c..7e0b4dd 100644 (file)
@@ -538,14 +538,12 @@ char *log_buf_addr_get(void)
 {
        return log_buf;
 }
-EXPORT_SYMBOL_GPL(log_buf_addr_get);
 
 /* Return log buffer size */
 u32 log_buf_len_get(void)
 {
        return log_buf_len;
 }
-EXPORT_SYMBOL_GPL(log_buf_len_get);
 
 /*
  * Define how much of the log buffer we could take at maximum. The value
index 45e1761..035b0a4 100644 (file)
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                               vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
 
 hostprogs      += mktables
 
index a22a05c..0ec534f 100644 (file)
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_neonx2,
        &raid6_neonx1,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_lsx,
+#endif
+#endif
 #if defined(__ia64__)
        &raid6_intx32,
        &raid6_intx16,
@@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if defined(CONFIG_KERNEL_MODE_NEON)
        &raid6_recov_neon,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_recov_lsx,
+#endif
+#endif
        &raid6_recov_intx1,
        NULL
 };
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644 (file)
index 0000000..acfc33c
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX    (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX   (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx    (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx   (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644 (file)
index 0000000..aa5d9f9
--- /dev/null
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+               asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+               asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+               asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+               asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+                                  size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr12");
+                       asm volatile("vxor.v $vr5, $vr17, $vr13");
+                       asm volatile("vxor.v $vr6, $vr18, $vr14");
+                       asm volatile("vxor.v $vr7, $vr19, $vr15");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "vld $vr20, %0\n\t"
+                       "vld $vr21, %1\n\t"
+                       "vld $vr22, %2\n\t"
+                       "vld $vr23, %3\n\t"
+                       "vld $vr24, %4\n\t"
+                       "vld $vr25, %5\n\t"
+                       "vld $vr26, %6\n\t"
+                       "vld $vr27, %7\n\t"
+                       "vxor.v $vr20, $vr20, $vr0\n\t"
+                       "vxor.v $vr21, $vr21, $vr1\n\t"
+                       "vxor.v $vr22, $vr22, $vr2\n\t"
+                       "vxor.v $vr23, $vr23, $vr3\n\t"
+                       "vxor.v $vr24, $vr24, $vr4\n\t"
+                       "vxor.v $vr25, $vr25, $vr5\n\t"
+                       "vxor.v $vr26, $vr26, $vr6\n\t"
+                       "vxor.v $vr27, $vr27, $vr7\n\t"
+                       "vst $vr20, %0\n\t"
+                       "vst $vr21, %1\n\t"
+                       "vst $vr22, %2\n\t"
+                       "vst $vr23, %3\n\t"
+                       "vst $vr24, %4\n\t"
+                       "vst $vr25, %5\n\t"
+                       "vst $vr26, %6\n\t"
+                       "vst $vr27, %7\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+       raid6_lsx_gen_syndrome,
+       raid6_lsx_xor_syndrome,
+       raid6_has_lsx,
+       "lsx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+                                   size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr7");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "xvld $xr10, %0\n\t"
+                       "xvld $xr11, %1\n\t"
+                       "xvld $xr12, %2\n\t"
+                       "xvld $xr13, %3\n\t"
+                       "xvxor.v $xr10, $xr10, $xr0\n\t"
+                       "xvxor.v $xr11, $xr11, $xr1\n\t"
+                       "xvxor.v $xr12, $xr12, $xr2\n\t"
+                       "xvxor.v $xr13, $xr13, $xr3\n\t"
+                       "xvst $xr10, %0\n\t"
+                       "xvst $xr11, %1\n\t"
+                       "xvst $xr12, %2\n\t"
+                       "xvst $xr13, %3\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+       raid6_lasx_gen_syndrome,
+       raid6_lasx_xor_syndrome,
+       raid6_has_lasx,
+       "lasx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644 (file)
index 0000000..94aeac8
--- /dev/null
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+                                 int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * vr20, vr21: qmul
+        * vr22, vr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+       while (bytes) {
+               /* vr4 - vr7: Q */
+               asm volatile("vld $vr4, %0" : : "m" (q[0]));
+               asm volatile("vld $vr5, %0" : : "m" (q[16]));
+               asm volatile("vld $vr6, %0" : : "m" (q[32]));
+               asm volatile("vld $vr7, %0" : : "m" (q[48]));
+               /*  vr4 - vr7: Q + Qxy */
+               asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               /* vr0 - vr3: P */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr0 - vr3: P + Pxy */
+               asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+               asm volatile("vxor.v $vr0, $vr0, $vr8");
+               asm volatile("vxor.v $vr1, $vr1, $vr9");
+               asm volatile("vxor.v $vr2, $vr2, $vr10");
+               asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+               asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+               asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+               asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+               asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+               asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+               asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+               /* vr16 - vr19: B(Q + Qxy) */
+               asm volatile("vxor.v $vr16, $vr8, $vr4");
+               asm volatile("vxor.v $vr17, $vr9, $vr5");
+               asm volatile("vxor.v $vr18, $vr10, $vr6");
+               asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+               /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("vsrli.b $vr4, $vr0, 4");
+               asm volatile("vsrli.b $vr5, $vr1, 4");
+               asm volatile("vsrli.b $vr6, $vr2, 4");
+               asm volatile("vsrli.b $vr7, $vr3, 4");
+               /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("vandi.b $vr12, $vr0, 0x0f");
+               asm volatile("vandi.b $vr13, $vr1, 0x0f");
+               asm volatile("vandi.b $vr14, $vr2, 0x0f");
+               asm volatile("vandi.b $vr15, $vr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+               asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+               asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+               asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+               /* lookup from pbmul[16] */
+               asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+               asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+               asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+               asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+               /* vr4 - vr7: A(P + Pxy) */
+               asm volatile("vxor.v $vr4, $vr4, $vr12");
+               asm volatile("vxor.v $vr5, $vr5, $vr13");
+               asm volatile("vxor.v $vr6, $vr6, $vr14");
+               asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+               /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr16");
+               asm volatile("vxor.v $vr5, $vr5, $vr17");
+               asm volatile("vxor.v $vr6, $vr6, $vr18");
+               asm volatile("vxor.v $vr7, $vr7, $vr19");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Pxy + Dx = Dy */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+               asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+               asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+               asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+                                 void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* vr22, vr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+       while (bytes) {
+               /* vr0 - vr3: P + Dx */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr4 - vr7: Qx */
+               asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+               /* vr4 - vr7: Q + Qx */
+               asm volatile("vld $vr8, %0" : : "m" (q[0]));
+               asm volatile("vld $vr9, %0" : : "m" (q[16]));
+               asm volatile("vld $vr10, %0" : : "m" (q[32]));
+               asm volatile("vld $vr11, %0" : : "m" (q[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+               asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+               asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+               asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+               asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+               asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+               asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+               /* vr4 - vr7: qmul(Q + Qx) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Dx + Dx = P */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (p[0]));
+               asm volatile("vst $vr1, %0" : "=m" (p[16]));
+               asm volatile("vst $vr2, %0" : "=m" (p[32]));
+               asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+       .data2 = raid6_2data_recov_lsx,
+       .datap = raid6_datap_recov_lsx,
+       .valid = raid6_has_lsx,
+       .name = "lsx",
+       .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+                                  int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * xr20, xr21: qmul
+        * xr22, xr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+       asm volatile("xvreplve0.q $xr20, $xr20");
+       asm volatile("xvreplve0.q $xr21, $xr21");
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: Q */
+               asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+               /* xr0, xr1: Q + Qxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+               /* xr2, xr3: P */
+               asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+               /* xr2, xr3: P + Pxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvsrli.b $xr4, $xr0, 4");
+               asm volatile("xvsrli.b $xr5, $xr1, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+               /* xr6, xr7: B(Q + Qxy) */
+               asm volatile("xvxor.v $xr6, $xr4, $xr0");
+               asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+               /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+               /* lookup from pbmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr0, xr1: A(P + Pxy) */
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+               /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("xvxor.v $xr0, $xr0, $xr6");
+               asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+               /* xr2, xr3: P + Pxy + Dx = Dy */
+               asm volatile("xvxor.v $xr2, $xr2, $xr0");
+               asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+               asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+                                  void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* xr22, xr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: P + Dx */
+               asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+               /* xr2, xr3: Qx */
+               asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+               /* xr2, xr3: Q + Qx */
+               asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+               asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr2, xr3: qmul(Q + Qx) = Dx */
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr0, xr1: P + Dx + Dx = P */
+               asm volatile("xvxor.v $xr0, $xr0, $xr2");
+               asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+               asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+       .data2 = raid6_2data_recov_lasx,
+       .datap = raid6_datap_recov_lasx,
+       .valid = raid6_has_lasx,
+       .name = "lasx",
+       .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
index 1f693ea..2abe007 100644 (file)
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 endif
 
+ifeq ($(ARCH),loongarch64)
+        CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+        CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' |         \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+        CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' |        \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
         CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
         CFLAGS += -DCONFIG_ALTIVEC
         OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
                 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+        OBJS += loongarch_simd.o recov_loongarch_simd.o
 endif
 
 .c.o:
index bf6219d..582f531 100644 (file)
  *    bdi.wb->list_lock                (zap_pte_range->set_page_dirty)
  *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
  *    ->private_lock           (zap_pte_range->block_dirty_folio)
- *
- * ->i_mmap_rwsem
- *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
 static void page_cache_delete(struct address_space *mapping,
index dcfec27..89895f3 100644 (file)
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                unsigned long end)
 {
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               pud_populate(&init_mm, pud,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pmd_init(p);
+                               pud_populate(&init_mm, pud, p);
                        }
                }
                zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                unsigned long end)
 {
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               p4d_populate(&init_mm, p4d,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pud_init(p);
+                               p4d_populate(&init_mm, p4d, p);
                        }
                }
                zero_pud_populate(p4d, addr, next);
index 2e973b3..f70e3d7 100644 (file)
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
        return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
                << KASAN_SHADOW_SCALE_SHIFT);
 }
+#endif
 
 static __always_inline bool addr_has_metadata(const void *addr)
 {
+#ifdef __HAVE_ARCH_SHADOW_MAP
+       return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
        return (kasan_reset_tag(addr) >=
                kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
 }
 
 /**
index 96fd041..3872528 100644 (file)
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
  */
 static unsigned long kfence_init_pool(void)
 {
-       unsigned long addr = (unsigned long)__kfence_pool;
+       unsigned long addr;
        struct page *pages;
        int i;
 
        if (!arch_kfence_init_pool())
-               return addr;
+               return (unsigned long)__kfence_pool;
 
+       addr = (unsigned long)__kfence_pool;
        pages = virt_to_page(__kfence_pool);
 
        /*
index 2918150..54c2c90 100644 (file)
@@ -1584,6 +1584,9 @@ static void kmemleak_scan(void)
                for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                        struct page *page = pfn_to_online_page(pfn);
 
+                       if (!(pfn & 63))
+                               cond_resched();
+
                        if (!page)
                                continue;
 
@@ -1594,8 +1597,6 @@ static void kmemleak_scan(void)
                        if (page_count(page) == 0)
                                continue;
                        scan_block(page, page + 1, NULL);
-                       if (!(pfn & 63))
-                               cond_resched();
                }
        }
        put_online_mems();
index 8d6aee0..981af9c 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2925,7 +2925,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
                struct anon_vma *av = rmap_item->anon_vma;
 
                anon_vma_lock_read(av);
-               read_lock(&tasklist_lock);
+               rcu_read_lock();
                for_each_process(tsk) {
                        struct anon_vma_chain *vmac;
                        unsigned long addr;
@@ -2944,7 +2944,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
                                }
                        }
                }
-               read_unlock(&tasklist_lock);
+               rcu_read_unlock();
                anon_vma_unlock_read(av);
        }
 }
index b29b850..a4d3282 100644 (file)
@@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
        memcg->deferred_split_queue.split_queue_len = 0;
 #endif
-       idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        lru_gen_init_memcg(memcg);
        return memcg;
 fail:
@@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
        if (alloc_shrinker_info(memcg))
                goto offline_kmem;
 
-       /* Online state pins memcg ID, memcg ID pins CSS */
-       refcount_set(&memcg->id.ref, 1);
-       css_get(css);
-
        if (unlikely(mem_cgroup_is_root(memcg)))
                queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
                                   FLUSH_TIME);
        lru_gen_online_memcg(memcg);
+
+       /* Online state pins memcg ID, memcg ID pins CSS */
+       refcount_set(&memcg->id.ref, 1);
+       css_get(css);
+
+       /*
+        * Ensure mem_cgroup_from_id() works once we're fully online.
+        *
+        * We could do this earlier and require callers to filter with
+        * css_tryget_online(). But right now there are no users that
+        * need earlier access, and the workingset code relies on the
+        * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
+        * publish it here at the end of onlining. This matches the
+        * regular ID destruction during offlining.
+        */
+       idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+
        return 0;
 offline_kmem:
        memcg_offline_kmem(memcg);
index 1cad190..2dba2cb 100644 (file)
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
                return -EINVAL;
 
        if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
-               pr_info_ratelimited(
+               pr_warn_once(
                        "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
                        current->comm, task_pid_nr(current));
        }
index 881c35e..4d6e43c 100644 (file)
@@ -547,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
  * on behalf of the thread group. Return task_struct of the (first found)
  * dedicated thread if found, and return NULL otherwise.
  *
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
  */
 static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
 {
@@ -609,7 +609,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
                return;
 
        pgoff = page_to_pgoff(page);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        for_each_process(tsk) {
                struct anon_vma_chain *vmac;
                struct task_struct *t = task_early_kill(tsk, force_early);
@@ -626,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
                        add_to_kill_anon_file(t, page, vma, to_kill);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        anon_vma_unlock_read(av);
 }
 
@@ -642,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
        pgoff_t pgoff;
 
        i_mmap_lock_read(mapping);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        pgoff = page_to_pgoff(page);
        for_each_process(tsk) {
                struct task_struct *t = task_early_kill(tsk, force_early);
@@ -662,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
                                add_to_kill_anon_file(t, page, vma, to_kill);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        i_mmap_unlock_read(mapping);
 }
 
@@ -685,7 +685,7 @@ static void collect_procs_fsdax(struct page *page,
        struct task_struct *tsk;
 
        i_mmap_lock_read(mapping);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        for_each_process(tsk) {
                struct task_struct *t = task_early_kill(tsk, true);
 
@@ -696,7 +696,7 @@ static void collect_procs_fsdax(struct page *page,
                                add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        i_mmap_unlock_read(mapping);
 }
 #endif /* CONFIG_FS_DAX */
@@ -717,7 +717,7 @@ static void collect_procs(struct page *page, struct list_head *tokill,
                collect_procs_file(page, tokill, force_early);
 }
 
-struct hwp_walk {
+struct hwpoison_walk {
        struct to_kill tk;
        unsigned long pfn;
        int flags;
@@ -752,7 +752,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
-                                     struct hwp_walk *hwp)
+                                     struct hwpoison_walk *hwp)
 {
        pmd_t pmd = *pmdp;
        unsigned long pfn;
@@ -770,7 +770,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
 }
 #else
 static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
-                                     struct hwp_walk *hwp)
+                                     struct hwpoison_walk *hwp)
 {
        return 0;
 }
@@ -779,7 +779,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
 static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
                              unsigned long end, struct mm_walk *walk)
 {
-       struct hwp_walk *hwp = walk->private;
+       struct hwpoison_walk *hwp = walk->private;
        int ret = 0;
        pte_t *ptep, *mapped_pte;
        spinlock_t *ptl;
@@ -813,7 +813,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
                            unsigned long addr, unsigned long end,
                            struct mm_walk *walk)
 {
-       struct hwp_walk *hwp = walk->private;
+       struct hwpoison_walk *hwp = walk->private;
        pte_t pte = huge_ptep_get(ptep);
        struct hstate *h = hstate_vma(walk->vma);
 
@@ -824,7 +824,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 #define hwpoison_hugetlb_range NULL
 #endif
 
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
        .pmd_entry = hwpoison_pte_range,
        .hugetlb_entry = hwpoison_hugetlb_range,
        .walk_lock = PGWALK_RDLOCK,
@@ -847,7 +847,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
                                  int flags)
 {
        int ret;
-       struct hwp_walk priv = {
+       struct hwpoison_walk priv = {
                .pfn = pfn,
        };
        priv.tk.tsk = p;
@@ -856,7 +856,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
                return -EFAULT;
 
        mmap_read_lock(p->mm);
-       ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+       ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
                              (void *)&priv);
        if (ret == 1 && priv.tk.addr)
                kill_proc(&priv.tk, pfn, flags);
@@ -1562,7 +1562,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * Here we are interested only in user-mapped pages, so skip any
         * other types of pages.
         */
-       if (PageReserved(p) || PageSlab(p) || PageTable(p))
+       if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
                return true;
        if (!(PageLRU(hpage) || PageHuge(p)))
                return true;
@@ -2533,7 +2533,8 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+       if (folio_test_slab(folio) || PageTable(&folio->page) ||
+           folio_test_reserved(folio) || PageOffline(&folio->page))
                goto unlock_mutex;
 
        /*
index 4524598..0c5be12 100644 (file)
@@ -2641,12 +2641,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
        do {
                page = NULL;
                spin_lock_irqsave(&zone->lock, flags);
-               /*
-                * order-0 request can reach here when the pcplist is skipped
-                * due to non-CMA allocation context. HIGHATOMIC area is
-                * reserved for high-order atomic allocation, so order-0
-                * request should skip it.
-                */
                if (alloc_flags & ALLOC_HIGHATOMIC)
                        page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
                if (!page) {
@@ -2780,17 +2774,10 @@ struct page *rmqueue(struct zone *preferred_zone,
        WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
 
        if (likely(pcp_allowed_order(order))) {
-               /*
-                * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
-                * we need to skip it when CMA area isn't allowed.
-                */
-               if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
-                               migratetype != MIGRATE_MOVABLE) {
-                       page = rmqueue_pcplist(preferred_zone, zone, order,
-                                       migratetype, alloc_flags);
-                       if (likely(page))
-                               goto out;
-               }
+               page = rmqueue_pcplist(preferred_zone, zone, order,
+                                      migratetype, alloc_flags);
+               if (likely(page))
+                       goto out;
        }
 
        page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
index f08b655..8cbbfd3 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object)
        if (vmalloc_dump_obj(object))
                return;
 
-       if (virt_addr_valid(object))
+       if (is_vmalloc_addr(object))
+               type = "vmalloc memory";
+       else if (virt_addr_valid(object))
                type = "non-slab/vmalloc memory";
        else if (object == NULL)
                type = "NULL pointer";
index 228a4a5..ef8599d 100644 (file)
@@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 #ifdef CONFIG_PRINTK
 bool vmalloc_dump_obj(void *object)
 {
-       struct vm_struct *vm;
        void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+       const void *caller;
+       struct vm_struct *vm;
+       struct vmap_area *va;
+       unsigned long addr;
+       unsigned int nr_pages;
 
-       vm = find_vm_area(objp);
-       if (!vm)
+       if (!spin_trylock(&vmap_area_lock))
+               return false;
+       va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+       if (!va) {
+               spin_unlock(&vmap_area_lock);
                return false;
+       }
+
+       vm = va->vm;
+       if (!vm) {
+               spin_unlock(&vmap_area_lock);
+               return false;
+       }
+       addr = (unsigned long)vm->addr;
+       caller = vm->caller;
+       nr_pages = vm->nr_pages;
+       spin_unlock(&vmap_area_lock);
        pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
-               vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+               nr_pages, addr, caller);
        return true;
 }
 #endif
index 57a7a64..0841f8d 100644 (file)
@@ -543,6 +543,7 @@ struct bpf_fentry_test_t {
 
 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
 {
+       asm volatile ("");
        return (long)arg;
 }
 
index feaec4a..b28c976 100644 (file)
@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        struct sock_exterr_skb *serr;
        struct sk_buff *skb;
        char *state = "UNK";
+       u32 tsflags;
        int err;
 
        jsk = j1939_sk(sk);
@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        if (!(jsk->state & J1939_SOCK_ERRQUEUE))
                return;
 
+       tsflags = READ_ONCE(sk->sk_tsflags);
        switch (type) {
        case J1939_ERRQUEUE_TX_ACK:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+               if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
                        return;
                break;
        case J1939_ERRQUEUE_TX_SCHED:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+               if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
                        return;
                break;
        case J1939_ERRQUEUE_TX_ABORT:
@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        case J1939_ERRQUEUE_RX_DPO:
                fallthrough;
        case J1939_ERRQUEUE_RX_ABORT:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+               if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
                        return;
                break;
        default:
@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        }
 
        serr->opt_stats = true;
-       if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+       if (tsflags & SOF_TIMESTAMPING_OPT_ID)
                serr->ee.ee_data = session->tskey;
 
        netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
index 5eb4898..10a41cd 100644 (file)
@@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
        return true;
 }
 
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+                                          size_t length)
+{
+       struct ceph_msg_data *data = cursor->data;
+
+       cursor->iov_iter = data->iter;
+       cursor->lastlen = 0;
+       iov_iter_truncate(&cursor->iov_iter, length);
+       cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+                                           size_t *page_offset, size_t *length)
+{
+       struct page *page;
+       ssize_t len;
+
+       if (cursor->lastlen)
+               iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+       len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+                                 1, page_offset);
+       BUG_ON(len < 0);
+
+       cursor->lastlen = len;
+
+       /*
+        * FIXME: The assumption is that the pages represented by the iov_iter
+        *        are pinned, with the references held by the upper-level
+        *        callers, or by virtue of being under writeback. Eventually,
+        *        we'll get an iov_iter_get_pages2 variant that doesn't take
+        *        page refs. Until then, just put the page ref.
+        */
+       VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+       put_page(page);
+
+       *length = min_t(size_t, len, cursor->resid);
+       return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+                                      size_t bytes)
+{
+       BUG_ON(bytes > cursor->resid);
+       cursor->resid -= bytes;
+
+       if (bytes < cursor->lastlen) {
+               cursor->lastlen -= bytes;
+       } else {
+               iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+               cursor->lastlen = 0;
+       }
+
+       return cursor->resid;
+}
+
 /*
  * Message data is handled (sent or received) in pieces, where each
  * piece resides on a single page.  The network layer might not
@@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
        case CEPH_MSG_DATA_BVECS:
                ceph_msg_data_bvecs_cursor_init(cursor, length);
                break;
+       case CEPH_MSG_DATA_ITER:
+               ceph_msg_data_iter_cursor_init(cursor, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                /* BUG(); */
@@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
 
        cursor->total_resid = length;
        cursor->data = msg->data;
+       cursor->sr_resid = 0;
 
        __ceph_msg_data_cursor_init(cursor);
 }
@@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
        case CEPH_MSG_DATA_BVECS:
                page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
                break;
+       case CEPH_MSG_DATA_ITER:
+               page = ceph_msg_data_iter_next(cursor, page_offset, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                page = NULL;
@@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
        case CEPH_MSG_DATA_BVECS:
                new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
                break;
+       case CEPH_MSG_DATA_ITER:
+               new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                BUG();
@@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
 
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+                           struct iov_iter *iter)
+{
+       struct ceph_msg_data *data;
+
+       data = ceph_msg_data_add(msg);
+       data->type = CEPH_MSG_DATA_ITER;
+       data->iter = *iter;
+
+       msg->data_length += iov_iter_count(&data->iter);
+}
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
index 3d57bb4..f9a50d7 100644 (file)
@@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
 
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
-       /* Initialize data cursor */
-
-       ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+       /* Initialize data cursor if it's not a sparse read */
+       if (!msg->sparse_read)
+               ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
 }
 
 /*
@@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con)
        prepare_read_tag(con);
 }
 
-static int read_partial_message_section(struct ceph_connection *con,
-                                       struct kvec *section,
-                                       unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+                                     struct kvec *section,
+                                     unsigned int sec_len, u32 *crc)
 {
        int ret, left;
 
@@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con,
                section->iov_len += ret;
        }
        if (section->iov_len == sec_len)
-               *crc = crc32c(0, section->iov_base, section->iov_len);
+               *crc = crc32c(*crc, section->iov_base, section->iov_len);
 
        return 1;
 }
 
+static inline int read_partial_message_section(struct ceph_connection *con,
+                                              struct kvec *section,
+                                              unsigned int sec_len, u32 *crc)
+{
+       *crc = 0;
+       return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+       bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+       if (do_bounce && unlikely(!con->bounce_page)) {
+               con->bounce_page = alloc_page(GFP_NOIO);
+               if (!con->bounce_page) {
+                       pr_err("failed to allocate bounce page\n");
+                       return -ENOMEM;
+               }
+       }
+
+       while (cursor->sr_resid > 0) {
+               struct page *page, *rpage;
+               size_t off, len;
+               int ret;
+
+               page = ceph_msg_data_next(cursor, &off, &len);
+               rpage = do_bounce ? con->bounce_page : page;
+
+               /* clamp to what remains in extent */
+               len = min_t(int, len, cursor->sr_resid);
+               ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+               if (ret <= 0)
+                       return ret;
+               *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+               ceph_msg_data_advance(cursor, (size_t)ret);
+               cursor->sr_resid -= ret;
+               if (do_bounce)
+                       memcpy_page(page, off, rpage, off, ret);
+       }
+       return 1;
+}
+
+static int read_sparse_msg_data(struct ceph_connection *con)
+{
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+       bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+       u32 crc = 0;
+       int ret = 1;
+
+       if (do_datacrc)
+               crc = con->in_data_crc;
+
+       do {
+               if (con->v1.in_sr_kvec.iov_base)
+                       ret = read_partial_message_chunk(con,
+                                                        &con->v1.in_sr_kvec,
+                                                        con->v1.in_sr_len,
+                                                        &crc);
+               else if (cursor->sr_resid > 0)
+                       ret = read_sparse_msg_extent(con, &crc);
+
+               if (ret <= 0) {
+                       if (do_datacrc)
+                               con->in_data_crc = crc;
+                       return ret;
+               }
+
+               memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+               ret = con->ops->sparse_read(con, cursor,
+                               (char **)&con->v1.in_sr_kvec.iov_base);
+               con->v1.in_sr_len = ret;
+       } while (ret > 0);
+
+       if (do_datacrc)
+               con->in_data_crc = crc;
+
+       return ret < 0 ? ret : 1;  /* must return > 0 to indicate success */
+}
+
 static int read_partial_msg_data(struct ceph_connection *con)
 {
        struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
@@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con)
                if (!m->num_data_items)
                        return -EIO;
 
-               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+               if (m->sparse_read)
+                       ret = read_sparse_msg_data(con);
+               else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
                        ret = read_partial_msg_data_bounce(con);
                else
                        ret = read_partial_msg_data(con);
index 1df1d29..d09a39f 100644 (file)
@@ -8,9 +8,9 @@
 #include <linux/ceph/ceph_debug.h>
 
 #include <crypto/aead.h>
-#include <crypto/algapi.h>  /* for crypto_memneq() */
 #include <crypto/hash.h>
 #include <crypto/sha2.h>
+#include <crypto/utils.h>
 #include <linux/bvec.h>
 #include <linux/crc32c.h>
 #include <linux/net.h>
 #define FRAME_LATE_STATUS_COMPLETE     0xe
 #define FRAME_LATE_STATUS_ABORTED_MASK 0xf
 
-#define IN_S_HANDLE_PREAMBLE           1
-#define IN_S_HANDLE_CONTROL            2
-#define IN_S_HANDLE_CONTROL_REMAINDER  3
-#define IN_S_PREPARE_READ_DATA         4
-#define IN_S_PREPARE_READ_DATA_CONT    5
-#define IN_S_PREPARE_READ_ENC_PAGE     6
-#define IN_S_HANDLE_EPILOGUE           7
-#define IN_S_FINISH_SKIP               8
+#define IN_S_HANDLE_PREAMBLE                   1
+#define IN_S_HANDLE_CONTROL                    2
+#define IN_S_HANDLE_CONTROL_REMAINDER          3
+#define IN_S_PREPARE_READ_DATA                 4
+#define IN_S_PREPARE_READ_DATA_CONT            5
+#define IN_S_PREPARE_READ_ENC_PAGE             6
+#define IN_S_PREPARE_SPARSE_DATA               7
+#define IN_S_PREPARE_SPARSE_DATA_CONT          8
+#define IN_S_HANDLE_EPILOGUE                   9
+#define IN_S_FINISH_SKIP                       10
 
 #define OUT_S_QUEUE_DATA               1
 #define OUT_S_QUEUE_DATA_CONT          2
@@ -967,12 +969,48 @@ static void init_sgs_cursor(struct scatterlist **sg,
        }
 }
 
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg:                scatterlist to populate
+ * @pages:     pointer to page array
+ * @dpos:      position in the array to start (bytes)
+ * @dlen:      len to add to sg (bytes)
+ * @pad:       pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+                          int dpos, int dlen, u8 *pad)
+{
+       int idx = dpos >> PAGE_SHIFT;
+       int off = offset_in_page(dpos);
+       int resid = dlen;
+
+       do {
+               int len = min(resid, (int)PAGE_SIZE - off);
+
+               sg_set_page(*sg, pages[idx], len, off);
+               *sg = sg_next(*sg);
+               off = 0;
+               ++idx;
+               resid -= len;
+       } while (resid);
+
+       if (need_padding(dlen)) {
+               sg_set_buf(*sg, pad, padding_len(dlen));
+               *sg = sg_next(*sg);
+       }
+}
+
 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
                             u8 *front_pad, u8 *middle_pad, u8 *data_pad,
-                            void *epilogue, bool add_tag)
+                            void *epilogue, struct page **pages, int dpos,
+                            bool add_tag)
 {
        struct ceph_msg_data_cursor cursor;
        struct scatterlist *cur_sg;
+       int dlen = data_len(msg);
        int sg_cnt;
        int ret;
 
@@ -986,9 +1024,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
        if (middle_len(msg))
                sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
                                      middle_len(msg));
-       if (data_len(msg)) {
-               ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
-               sg_cnt += calc_sg_cnt_cursor(&cursor);
+       if (dlen) {
+               if (pages) {
+                       sg_cnt += calc_pages_for(dpos, dlen);
+                       if (need_padding(dlen))
+                               sg_cnt++;
+               } else {
+                       ceph_msg_data_cursor_init(&cursor, msg, dlen);
+                       sg_cnt += calc_sg_cnt_cursor(&cursor);
+               }
        }
 
        ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
@@ -1002,9 +1046,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
        if (middle_len(msg))
                init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
                         middle_pad);
-       if (data_len(msg)) {
-               ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
-               init_sgs_cursor(&cur_sg, &cursor, data_pad);
+       if (dlen) {
+               if (pages) {
+                       init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+               } else {
+                       ceph_msg_data_cursor_init(&cursor, msg, dlen);
+                       init_sgs_cursor(&cur_sg, &cursor, data_pad);
+               }
        }
 
        WARN_ON(!sg_is_last(cur_sg));
@@ -1039,10 +1087,53 @@ static int decrypt_control_remainder(struct ceph_connection *con)
                         padded_len(rem_len) + CEPH_GCM_TAG_LEN);
 }
 
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+                                 struct page **pages, int spos)
+{
+       struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+       int ret;
+
+       for (;;) {
+               char *buf = NULL;
+
+               ret = con->ops->sparse_read(con, cursor, &buf);
+               if (ret <= 0)
+                       return ret;
+
+               dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+               do {
+                       int idx = spos >> PAGE_SHIFT;
+                       int soff = offset_in_page(spos);
+                       struct page *spage = con->v2.in_enc_pages[idx];
+                       int len = min_t(int, ret, PAGE_SIZE - soff);
+
+                       if (buf) {
+                               memcpy_from_page(buf, spage, soff, len);
+                               buf += len;
+                       } else {
+                               struct bio_vec bv;
+
+                               get_bvec_at(cursor, &bv);
+                               len = min_t(int, len, bv.bv_len);
+                               memcpy_page(bv.bv_page, bv.bv_offset,
+                                           spage, soff, len);
+                               ceph_msg_data_advance(cursor, len);
+                       }
+                       spos += len;
+                       ret -= len;
+               } while (ret);
+       }
+}
+
 static int decrypt_tail(struct ceph_connection *con)
 {
        struct sg_table enc_sgt = {};
        struct sg_table sgt = {};
+       struct page **pages = NULL;
+       bool sparse = con->in_msg->sparse_read;
+       int dpos = 0;
        int tail_len;
        int ret;
 
@@ -1053,9 +1144,14 @@ static int decrypt_tail(struct ceph_connection *con)
        if (ret)
                goto out;
 
+       if (sparse) {
+               dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+               pages = con->v2.in_enc_pages;
+       }
+
        ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
-                       MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
-                       con->v2.in_buf, true);
+                               MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+                               con->v2.in_buf, pages, dpos, true);
        if (ret)
                goto out;
 
@@ -1065,6 +1161,12 @@ static int decrypt_tail(struct ceph_connection *con)
        if (ret)
                goto out;
 
+       if (sparse && data_len(con->in_msg)) {
+               ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+               if (ret)
+                       goto out;
+       }
+
        WARN_ON(!con->v2.in_enc_page_cnt);
        ceph_release_page_vector(con->v2.in_enc_pages,
                                 con->v2.in_enc_page_cnt);
@@ -1588,7 +1690,7 @@ static int prepare_message_secure(struct ceph_connection *con)
 
        encode_epilogue_secure(con, false);
        ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
-                               &con->v2.out_epil, false);
+                               &con->v2.out_epil, NULL, 0, false);
        if (ret)
                goto out;
 
@@ -1825,6 +1927,123 @@ static void prepare_read_data_cont(struct ceph_connection *con)
        con->v2.in_state = IN_S_HANDLE_EPILOGUE;
 }
 
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+       int ret;
+       struct bio_vec bv;
+       char *buf = NULL;
+       struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+       WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+       if (iov_iter_is_bvec(&con->v2.in_iter)) {
+               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+                       con->in_data_crc = crc32c(con->in_data_crc,
+                                                 page_address(con->bounce_page),
+                                                 con->v2.in_bvec.bv_len);
+                       get_bvec_at(cursor, &bv);
+                       memcpy_to_page(bv.bv_page, bv.bv_offset,
+                                      page_address(con->bounce_page),
+                                      con->v2.in_bvec.bv_len);
+               } else {
+                       con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+                                                           con->v2.in_bvec.bv_page,
+                                                           con->v2.in_bvec.bv_offset,
+                                                           con->v2.in_bvec.bv_len);
+               }
+
+               ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+               cursor->sr_resid -= con->v2.in_bvec.bv_len;
+               dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+                    con->v2.in_bvec.bv_len, cursor->sr_resid);
+               WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+               if (cursor->sr_resid) {
+                       get_bvec_at(cursor, &bv);
+                       if (bv.bv_len > cursor->sr_resid)
+                               bv.bv_len = cursor->sr_resid;
+                       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+                               bv.bv_page = con->bounce_page;
+                               bv.bv_offset = 0;
+                       }
+                       set_in_bvec(con, &bv);
+                       con->v2.data_len_remain -= bv.bv_len;
+                       return 0;
+               }
+       } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+               /* On first call, we have no kvec so don't compute crc */
+               if (con->v2.in_kvec_cnt) {
+                       WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+                       con->in_data_crc = crc32c(con->in_data_crc,
+                                                 con->v2.in_kvecs[0].iov_base,
+                                                 con->v2.in_kvecs[0].iov_len);
+               }
+       } else {
+               return -EIO;
+       }
+
+       /* get next extent */
+       ret = con->ops->sparse_read(con, cursor, &buf);
+       if (ret <= 0) {
+               if (ret < 0)
+                       return ret;
+
+               reset_in_kvecs(con);
+               add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+               con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+               return 0;
+       }
+
+       if (buf) {
+               /* receive into buffer */
+               reset_in_kvecs(con);
+               add_in_kvec(con, buf, ret);
+               con->v2.data_len_remain -= ret;
+               return 0;
+       }
+
+       if (ret > cursor->total_resid) {
+               pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+                       __func__, ret, cursor->total_resid, cursor->resid);
+               return -EIO;
+       }
+       get_bvec_at(cursor, &bv);
+       if (bv.bv_len > cursor->sr_resid)
+               bv.bv_len = cursor->sr_resid;
+       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+               if (unlikely(!con->bounce_page)) {
+                       con->bounce_page = alloc_page(GFP_NOIO);
+                       if (!con->bounce_page) {
+                               pr_err("failed to allocate bounce page\n");
+                               return -ENOMEM;
+                       }
+               }
+
+               bv.bv_page = con->bounce_page;
+               bv.bv_offset = 0;
+       }
+       set_in_bvec(con, &bv);
+       con->v2.data_len_remain -= ret;
+       return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+       struct ceph_msg *msg = con->in_msg;
+
+       dout("%s: starting sparse read\n", __func__);
+
+       if (WARN_ON_ONCE(!con->ops->sparse_read))
+               return -EOPNOTSUPP;
+
+       if (!con_secure(con))
+               con->in_data_crc = -1;
+
+       reset_in_kvecs(con);
+       con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+       con->v2.data_len_remain = data_len(msg);
+       return prepare_sparse_read_cont(con);
+}
+
 static int prepare_read_tail_plain(struct ceph_connection *con)
 {
        struct ceph_msg *msg = con->in_msg;
@@ -1845,7 +2064,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
        }
 
        if (data_len(msg)) {
-               con->v2.in_state = IN_S_PREPARE_READ_DATA;
+               if (msg->sparse_read)
+                       con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+               else
+                       con->v2.in_state = IN_S_PREPARE_READ_DATA;
        } else {
                add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
                con->v2.in_state = IN_S_HANDLE_EPILOGUE;
@@ -2898,6 +3120,12 @@ static int populate_in_iter(struct ceph_connection *con)
                        prepare_read_enc_page(con);
                        ret = 0;
                        break;
+               case IN_S_PREPARE_SPARSE_DATA:
+                       ret = prepare_sparse_read_data(con);
+                       break;
+               case IN_S_PREPARE_SPARSE_DATA_CONT:
+                       ret = prepare_sparse_read_cont(con);
+                       break;
                case IN_S_HANDLE_EPILOGUE:
                        ret = handle_epilogue(con);
                        break;
@@ -3489,6 +3717,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
        con->v2.in_state = IN_S_FINISH_SKIP;
 }
 
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+       int resid;  /* current piece of data */
+       int remaining;
+
+       WARN_ON(con_secure(con));
+       WARN_ON(!data_len(con->in_msg));
+       WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+       resid = iov_iter_count(&con->v2.in_iter);
+       dout("%s con %p resid %d\n", __func__, con, resid);
+
+       remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+       con->v2.in_iter.count -= resid;
+       set_in_skip(con, resid + remaining);
+       con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
 static void revoke_at_handle_epilogue(struct ceph_connection *con)
 {
        int resid;
@@ -3505,6 +3750,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con)
 void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
 {
        switch (con->v2.in_state) {
+       case IN_S_PREPARE_SPARSE_DATA:
        case IN_S_PREPARE_READ_DATA:
                revoke_at_prepare_read_data(con);
                break;
@@ -3514,6 +3760,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
        case IN_S_PREPARE_READ_ENC_PAGE:
                revoke_at_prepare_read_enc_page(con);
                break;
+       case IN_S_PREPARE_SPARSE_DATA_CONT:
+               revoke_at_prepare_sparse_data(con);
+               break;
        case IN_S_HANDLE_EPILOGUE:
                revoke_at_handle_epilogue(con);
                break;
index 658a6f2..d3a759e 100644 (file)
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
        osd_data->num_bvecs = num_bvecs;
 }
 
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+                              struct iov_iter *iter)
+{
+       osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+       osd_data->iter = *iter;
+}
+
 static struct ceph_osd_data *
 osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
 {
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
 
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+                               unsigned int which, struct iov_iter *iter)
+{
+       struct ceph_osd_data *osd_data;
+
+       osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+       ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
 static void osd_req_op_cls_request_info_pagelist(
                        struct ceph_osd_request *osd_req,
                        unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
 #endif /* CONFIG_BLOCK */
        case CEPH_OSD_DATA_TYPE_BVECS:
                return osd_data->bvec_pos.iter.bi_size;
+       case CEPH_OSD_DATA_TYPE_ITER:
+               return iov_iter_count(&osd_data->iter);
        default:
                WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
                return 0;
@@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 
        switch (op->op) {
        case CEPH_OSD_OP_READ:
+       case CEPH_OSD_OP_SPARSE_READ:
        case CEPH_OSD_OP_WRITE:
        case CEPH_OSD_OP_WRITEFULL:
+               kfree(op->extent.sparse_ext);
                ceph_osd_data_release(&op->extent.osd_data);
                break;
        case CEPH_OSD_OP_CALL:
@@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
                /* reply */
                case CEPH_OSD_OP_STAT:
                case CEPH_OSD_OP_READ:
+               case CEPH_OSD_OP_SPARSE_READ:
                case CEPH_OSD_OP_LIST_WATCHERS:
                        *num_reply_data_items += 1;
                        break;
@@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 
        BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
               opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
-              opcode != CEPH_OSD_OP_TRUNCATE);
+              opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
 
        op->extent.offset = offset;
        op->extent.length = length;
@@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
 #endif
        } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
                ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+       } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+               ceph_msg_data_add_iter(msg, &osd_data->iter);
        } else {
                BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
        }
@@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
        case CEPH_OSD_OP_STAT:
                break;
        case CEPH_OSD_OP_READ:
+       case CEPH_OSD_OP_SPARSE_READ:
        case CEPH_OSD_OP_WRITE:
        case CEPH_OSD_OP_WRITEFULL:
        case CEPH_OSD_OP_ZERO:
@@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
                dst->copy_from.src_fadvise_flags =
                        cpu_to_le32(src->copy_from.src_fadvise_flags);
                break;
+       case CEPH_OSD_OP_ASSERT_VER:
+               dst->assert_ver.unused = cpu_to_le64(0);
+               dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+               break;
        default:
                pr_err("unsupported osd opcode %s\n",
                        ceph_osd_op_name(src->op));
@@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
        BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
               opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
-              opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+              opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+              opcode != CEPH_OSD_OP_SPARSE_READ);
 
        req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
                                        GFP_NOFS);
@@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        if (flags & CEPH_OSD_FLAG_WRITE)
                req->r_data_offset = off;
 
-       if (num_ops > 1)
+       if (num_ops > 1) {
+               int num_req_ops, num_rep_ops;
+
                /*
-                * This is a special case for ceph_writepages_start(), but it
-                * also covers ceph_uninline_data().  If more multi-op request
-                * use cases emerge, we will need a separate helper.
+                * If this is a multi-op write request, assume that we'll need
+                * request ops. If it's a multi-op read then assume we'll need
+                * reply ops. Anything else and call it -EINVAL.
                 */
-               r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
-       else
+               if (flags & CEPH_OSD_FLAG_WRITE) {
+                       num_req_ops = num_ops;
+                       num_rep_ops = 0;
+               } else if (flags & CEPH_OSD_FLAG_READ) {
+                       num_req_ops = 0;
+                       num_rep_ops = num_ops;
+               } else {
+                       r = -EINVAL;
+                       goto fail;
+               }
+
+               r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+                                              num_rep_ops);
+       } else {
                r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+       }
        if (r)
                goto fail;
 
@@ -1120,6 +1171,18 @@ fail:
 }
 EXPORT_SYMBOL(ceph_osdc_new_request);
 
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+       op->extent.sparse_ext_cnt = cnt;
+       op->extent.sparse_ext = kmalloc_array(cnt,
+                                             sizeof(*op->extent.sparse_ext),
+                                             GFP_NOFS);
+       if (!op->extent.sparse_ext)
+               return -ENOMEM;
+       return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
 /*
  * We keep osd requests in an rbtree, sorted by ->r_tid.
  */
@@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd)
 {
        refcount_set(&osd->o_ref, 1);
        RB_CLEAR_NODE(&osd->o_node);
+       spin_lock_init(&osd->o_requests_lock);
        osd->o_requests = RB_ROOT;
        osd->o_linger_requests = RB_ROOT;
        osd->o_backoff_mappings = RB_ROOT;
@@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd)
        mutex_init(&osd->lock);
 }
 
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+       kfree(sr->sr_extent);
+       memset(sr, '\0', sizeof(*sr));
+       sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
 static void osd_cleanup(struct ceph_osd *osd)
 {
        WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
@@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd)
        WARN_ON(!list_empty(&osd->o_osd_lru));
        WARN_ON(!list_empty(&osd->o_keepalive_item));
 
+       ceph_init_sparse_read(&osd->o_sparse_read);
+
        if (osd->o_auth.authorizer) {
                WARN_ON(osd_homeless(osd));
                ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
@@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
        osd_init(osd);
        osd->o_osdc = osdc;
        osd->o_osd = onum;
+       osd->o_sparse_op_idx = -1;
+
+       ceph_init_sparse_read(&osd->o_sparse_read);
 
        ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
 
@@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req)
                atomic_inc(&osd->o_osdc->num_homeless);
 
        get_osd(osd);
+       spin_lock(&osd->o_requests_lock);
        insert_request(&osd->o_requests, req);
+       spin_unlock(&osd->o_requests_lock);
        req->r_osd = osd;
 }
 
@@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
             req, req->r_tid);
 
        req->r_osd = NULL;
+       spin_lock(&osd->o_requests_lock);
        erase_request(&osd->o_requests, req);
+       spin_unlock(&osd->o_requests_lock);
        put_osd(osd);
 
        if (!osd_homeless(osd))
@@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req)
                                               &op->raw_data_in);
                        break;
                case CEPH_OSD_OP_READ:
+               case CEPH_OSD_OP_SPARSE_READ:
                        ceph_osdc_msg_data_add(reply_msg,
                                               &op->extent.osd_data);
                        break;
@@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req)
 
        req->r_end_latency = ktime_get();
 
-       if (req->r_osd)
+       if (req->r_osd) {
+               ceph_init_sparse_read(&req->r_osd->o_sparse_read);
                unlink_request(req->r_osd, req);
+       }
        atomic_dec(&osdc->num_requests);
 
        /*
@@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
         * one (type of) reply back.
         */
        WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+       req->r_version = m.user_version;
        req->r_result = m.result ?: data_len;
        finish_request(req);
        mutex_unlock(&osd->lock);
@@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
        ceph_msg_put(msg);
 }
 
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+       u64 len = 0;
+
+       if (req->r_flags & CEPH_OSD_FLAG_READ) {
+               int i;
+
+               for (i = 0; i < req->r_num_ops; ++i) {
+                       struct ceph_osd_req_op *op = &req->r_ops[i];
+
+                       if (op->op == CEPH_OSD_OP_SPARSE_READ)
+                               len += op->extent.length;
+               }
+       }
+       return len;
+}
+
 /*
  * Lookup and return message for incoming reply.  Don't try to do
  * anything about a larger than preallocated data portion of the
@@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        int front_len = le32_to_cpu(hdr->front_len);
        int data_len = le32_to_cpu(hdr->data_len);
        u64 tid = le64_to_cpu(hdr->tid);
+       u64 srlen;
 
        down_read(&osdc->lock);
        if (!osd_registered(osd)) {
@@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
                req->r_reply = m;
        }
 
-       if (data_len > req->r_reply->data_length) {
+       srlen = sparse_data_requested(req);
+       if (!srlen && data_len > req->r_reply->data_length) {
                pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
                        __func__, osd->o_osd, req->r_tid, data_len,
                        req->r_reply->data_length);
@@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        }
 
        m = ceph_msg_get(req->r_reply);
+       m->sparse_read = (bool)srlen;
+
        dout("get_reply tid %lld %p\n", tid, m);
 
 out_unlock_session:
@@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg)
        return ceph_auth_check_message_signature(auth, msg);
 }
 
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+                          bool zero)
+{
+       while (len) {
+               struct page *page;
+               size_t poff, plen;
+
+               page = ceph_msg_data_next(cursor, &poff, &plen);
+               if (plen > len)
+                       plen = len;
+               if (zero)
+                       zero_user_segment(page, poff, poff + plen);
+               len -= plen;
+               ceph_msg_data_advance(cursor, plen);
+       }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+                                struct ceph_msg_data_cursor *cursor)
+{
+       struct ceph_osd *o = con->private;
+       struct ceph_sparse_read *sr = &o->o_sparse_read;
+       struct ceph_osd_request *req;
+       struct ceph_osd_req_op *op;
+
+       spin_lock(&o->o_requests_lock);
+       req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+       if (!req) {
+               spin_unlock(&o->o_requests_lock);
+               return -EBADR;
+       }
+
+       if (o->o_sparse_op_idx < 0) {
+               u64 srlen = sparse_data_requested(req);
+
+               dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
+                    __func__, o->o_osd, srlen);
+               ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+       } else {
+               u64 end;
+
+               op = &req->r_ops[o->o_sparse_op_idx];
+
+               WARN_ON_ONCE(op->extent.sparse_ext);
+
+               /* hand back buffer we took earlier */
+               op->extent.sparse_ext = sr->sr_extent;
+               sr->sr_extent = NULL;
+               op->extent.sparse_ext_cnt = sr->sr_count;
+               sr->sr_ext_len = 0;
+               dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+                    __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+               /* Advance to end of data for this operation */
+               end = ceph_sparse_ext_map_end(op);
+               if (end < sr->sr_req_len)
+                       advance_cursor(cursor, sr->sr_req_len - end, false);
+       }
+
+       ceph_init_sparse_read(sr);
+
+       /* find next op in this request (if any) */
+       while (++o->o_sparse_op_idx < req->r_num_ops) {
+               op = &req->r_ops[o->o_sparse_op_idx];
+               if (op->op == CEPH_OSD_OP_SPARSE_READ)
+                       goto found;
+       }
+
+       /* reset for next sparse read request */
+       spin_unlock(&o->o_requests_lock);
+       o->o_sparse_op_idx = -1;
+       return 0;
+found:
+       sr->sr_req_off = op->extent.offset;
+       sr->sr_req_len = op->extent.length;
+       sr->sr_pos = sr->sr_req_off;
+       dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+            o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+       /* hand off request's sparse extent map buffer */
+       sr->sr_ext_len = op->extent.sparse_ext_cnt;
+       op->extent.sparse_ext_cnt = 0;
+       sr->sr_extent = op->extent.sparse_ext;
+       op->extent.sparse_ext = NULL;
+
+       spin_unlock(&o->o_requests_lock);
+       return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+       int i;
+
+       for (i = 0; i < sr->sr_count; i++) {
+               struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+               ext->off = le64_to_cpu((__force __le64)ext->off);
+               ext->len = le64_to_cpu((__force __le64)ext->len);
+       }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+#define MAX_EXTENTS 4096
+
+static int osd_sparse_read(struct ceph_connection *con,
+                          struct ceph_msg_data_cursor *cursor,
+                          char **pbuf)
+{
+       struct ceph_osd *o = con->private;
+       struct ceph_sparse_read *sr = &o->o_sparse_read;
+       u32 count = sr->sr_count;
+       u64 eoff, elen;
+       int ret;
+
+       switch (sr->sr_state) {
+       case CEPH_SPARSE_READ_HDR:
+next_op:
+               ret = prep_next_sparse_read(con, cursor);
+               if (ret <= 0)
+                       return ret;
+
+               /* number of extents */
+               ret = sizeof(sr->sr_count);
+               *pbuf = (char *)&sr->sr_count;
+               sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+               break;
+       case CEPH_SPARSE_READ_EXTENTS:
+               /* Convert sr_count to host-endian */
+               count = le32_to_cpu((__force __le32)sr->sr_count);
+               sr->sr_count = count;
+               dout("[%d] got %u extents\n", o->o_osd, count);
+
+               if (count > 0) {
+                       if (!sr->sr_extent || count > sr->sr_ext_len) {
+                               /*
+                                * Apply a hard cap to the number of extents.
+                                * If we have more, assume something is wrong.
+                                */
+                               if (count > MAX_EXTENTS) {
+                                       dout("%s: OSD returned 0x%x extents in a single reply!\n",
+                                            __func__, count);
+                                       return -EREMOTEIO;
+                               }
+
+                               /* no extent array provided, or too short */
+                               kfree(sr->sr_extent);
+                               sr->sr_extent = kmalloc_array(count,
+                                                             sizeof(*sr->sr_extent),
+                                                             GFP_NOIO);
+                               if (!sr->sr_extent)
+                                       return -ENOMEM;
+                               sr->sr_ext_len = count;
+                       }
+                       ret = count * sizeof(*sr->sr_extent);
+                       *pbuf = (char *)sr->sr_extent;
+                       sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+                       break;
+               }
+               /* No extents? Read data len */
+               fallthrough;
+       case CEPH_SPARSE_READ_DATA_LEN:
+               convert_extent_map(sr);
+               ret = sizeof(sr->sr_datalen);
+               *pbuf = (char *)&sr->sr_datalen;
+               sr->sr_state = CEPH_SPARSE_READ_DATA;
+               break;
+       case CEPH_SPARSE_READ_DATA:
+               if (sr->sr_index >= count) {
+                       sr->sr_state = CEPH_SPARSE_READ_HDR;
+                       goto next_op;
+               }
+
+               eoff = sr->sr_extent[sr->sr_index].off;
+               elen = sr->sr_extent[sr->sr_index].len;
+
+               dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+                    o->o_osd, sr->sr_index, eoff, elen);
+
+               if (elen > INT_MAX) {
+                       dout("Sparse read extent length too long (0x%llx)\n",
+                            elen);
+                       return -EREMOTEIO;
+               }
+
+               /* zero out anything from sr_pos to start of extent */
+               if (sr->sr_pos < eoff)
+                       advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+               /* Set position to end of extent */
+               sr->sr_pos = eoff + elen;
+
+               /* send back the new length and nullify the ptr */
+               cursor->sr_resid = elen;
+               ret = elen;
+               *pbuf = NULL;
+
+               /* Bump the array index */
+               ++sr->sr_index;
+               break;
+       }
+       return ret;
+}
+
 static const struct ceph_connection_operations osd_con_ops = {
        .get = osd_get_con,
        .put = osd_put_con,
+       .sparse_read = osd_sparse_read,
        .alloc_msg = osd_alloc_msg,
        .dispatch = osd_dispatch,
        .fault = osd_fault,
index 89d15ce..b3b3af0 100644 (file)
@@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
 
        memset(&keys, 0, sizeof(keys));
        __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
-                          &keys, NULL, 0, 0, 0,
-                          FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+                          &keys, NULL, 0, 0, 0, 0);
 
        return __flow_hash_from_keys(&keys, &hashrnd);
 }
index 4570705..4eaf7ed 100644 (file)
@@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                             bool *pfmemalloc)
 {
        bool ret_pfmemalloc = false;
-       unsigned int obj_size;
+       size_t obj_size;
        void *obj;
 
        obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
                goto out;
        }
-       *size = obj_size = kmalloc_size_roundup(obj_size);
+
+       obj_size = kmalloc_size_roundup(obj_size);
+       /* The following cast might truncate high-order bits of obj_size, this
+        * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+        */
+       *size = (unsigned int)obj_size;
+
        /*
         * Try a regular allocation, when that fails and we're not entitled
         * to the reserves, fail.
@@ -4423,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
        struct sk_buff *segs = NULL;
        struct sk_buff *tail = NULL;
        struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
        unsigned int mss = skb_shinfo(head_skb)->gso_size;
        unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
-       struct sk_buff *frag_skb = head_skb;
        unsigned int offset = doffset;
        unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
        unsigned int partial_segs = 0;
        unsigned int headroom;
        unsigned int len = head_skb->len;
+       struct sk_buff *frag_skb;
+       skb_frag_t *frag;
        __be16 proto;
        bool csum, sg;
-       int nfrags = skb_shinfo(head_skb)->nr_frags;
        int err = -ENOMEM;
        int i = 0;
-       int pos;
+       int nfrags, pos;
 
        if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
            mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4514,6 +4519,13 @@ normal:
        headroom = skb_headroom(head_skb);
        pos = skb_headlen(head_skb);
 
+       if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+               return ERR_PTR(-ENOMEM);
+
+       nfrags = skb_shinfo(head_skb)->nr_frags;
+       frag = skb_shinfo(head_skb)->frags;
+       frag_skb = head_skb;
+
        do {
                struct sk_buff *nskb;
                skb_frag_t *nskb_frag;
@@ -4534,6 +4546,10 @@ normal:
                    (skb_headlen(list_skb) == len || sg)) {
                        BUG_ON(skb_headlen(list_skb) > len);
 
+                       nskb = skb_clone(list_skb, GFP_ATOMIC);
+                       if (unlikely(!nskb))
+                               goto err;
+
                        i = 0;
                        nfrags = skb_shinfo(list_skb)->nr_frags;
                        frag = skb_shinfo(list_skb)->frags;
@@ -4552,12 +4568,8 @@ normal:
                                frag++;
                        }
 
-                       nskb = skb_clone(list_skb, GFP_ATOMIC);
                        list_skb = list_skb->next;
 
-                       if (unlikely(!nskb))
-                               goto err;
-
                        if (unlikely(pskb_trim(nskb, len))) {
                                kfree_skb(nskb);
                                goto err;
@@ -4633,12 +4645,16 @@ normal:
                skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
                                           SKBFL_SHARED_FRAG;
 
-               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+               if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
                        goto err;
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
+                               if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+                                   skb_zerocopy_clone(nskb, list_skb,
+                                                      GFP_ATOMIC))
+                                       goto err;
+
                                i = 0;
                                nfrags = skb_shinfo(list_skb)->nr_frags;
                                frag = skb_shinfo(list_skb)->frags;
@@ -4652,10 +4668,6 @@ normal:
                                        i--;
                                        frag--;
                                }
-                               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                                   skb_zerocopy_clone(nskb, frag_skb,
-                                                      GFP_ATOMIC))
-                                       goto err;
 
                                list_skb = list_skb->next;
                        }
@@ -5207,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
        serr->ee.ee_info = tstype;
        serr->opt_stats = opt_stats;
        serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
-       if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+       if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
                serr->ee.ee_data = skb_shinfo(skb)->tskey;
                if (sk_is_tcp(sk))
                        serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5263,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 {
        struct sk_buff *skb;
        bool tsonly, opt_stats = false;
+       u32 tsflags;
 
        if (!sk)
                return;
 
-       if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+       tsflags = READ_ONCE(sk->sk_tsflags);
+       if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
            skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
                return;
 
-       tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
        if (!skb_may_tx_timestamp(sk, tsonly))
                return;
 
        if (tsonly) {
 #ifdef CONFIG_INET
-               if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+               if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk_is_tcp(sk)) {
                        skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
                                                             ack_skb);
index a0659fc..6c31eef 100644 (file)
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
                               u32 off, u32 len, bool ingress)
 {
+       int err = 0;
+
        if (!ingress) {
                if (!sock_writeable(psock->sk))
                        return -EAGAIN;
                return skb_send_sock(psock->sk, skb, off, len);
        }
-       return sk_psock_skb_ingress(psock, skb, off, len);
+       skb_get(skb);
+       err = sk_psock_skb_ingress(psock, skb, off, len);
+       if (err < 0)
+               kfree_skb(skb);
+       return err;
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
                } while (len);
 
                skb = skb_dequeue(&psock->ingress_skb);
-               if (!ingress) {
-                       kfree_skb(skb);
-               }
+               kfree_skb(skb);
        }
 end:
        mutex_unlock(&psock->work_mutex);
index 666a17c..16584e2 100644 (file)
@@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk)
                return false;
        if (!sk)
                return true;
-       switch (sk->sk_family) {
+       /* IPV6_ADDRFORM can change sk->sk_family under us. */
+       switch (READ_ONCE(sk->sk_family)) {
        case AF_INET:
                return inet_test_bit(MC_LOOP, sk);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
        if (!match)
                return -EINVAL;
 
-       sk->sk_bind_phc = phc_index;
+       WRITE_ONCE(sk->sk_bind_phc, phc_index);
 
        return 0;
 }
@@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
                        return ret;
        }
 
-       sk->sk_tsflags = val;
+       WRITE_ONCE(sk->sk_tsflags, val);
        sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
 
        if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
                return -ENOMEM;
        }
-       sk->sk_forward_alloc += pages << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
 
        WRITE_ONCE(sk->sk_reserved_mem,
                   sk->sk_reserved_mem + (pages << PAGE_SHIFT));
@@ -1718,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 
        case SO_TIMESTAMPING_OLD:
                lv = sizeof(v.timestamping);
-               v.timestamping.flags = sk->sk_tsflags;
-               v.timestamping.bind_phc = sk->sk_bind_phc;
+               v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+               v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
                break;
 
        case SO_RCVTIMEO_OLD:
@@ -2746,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
                        break;
-               if (sk->sk_shutdown & SEND_SHUTDOWN)
+               if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        break;
-               if (sk->sk_err)
+               if (READ_ONCE(sk->sk_err))
                        break;
                timeo = schedule_timeout(timeo);
        }
@@ -2776,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                        goto failure;
 
                err = -EPIPE;
-               if (sk->sk_shutdown & SEND_SHUTDOWN)
+               if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        goto failure;
 
                if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
@@ -3138,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
        int ret, amt = sk_mem_pages(size);
 
-       sk->sk_forward_alloc += amt << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
        ret = __sk_mem_raise_allocated(sk, size, amt, kind);
        if (!ret)
-               sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+               sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
        return ret;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3173,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
 void __sk_mem_reclaim(struct sock *sk, int amount)
 {
        amount >>= PAGE_SHIFT;
-       sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
        __sk_mem_reduce_allocated(sk, amount);
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3742,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
        mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
        mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
        mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
-       mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+       mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
        mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
        mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
        mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
index 8f07fea..cb11750 100644 (file)
@@ -18,7 +18,7 @@ struct bpf_stab {
        struct bpf_map map;
        struct sock **sks;
        struct sk_psock_progs progs;
-       raw_spinlock_t lock;
+       spinlock_t lock;
 };
 
 #define SOCK_CREATE_FLAG_MASK                          \
@@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
                return ERR_PTR(-ENOMEM);
 
        bpf_map_init_from_attr(&stab->map, attr);
-       raw_spin_lock_init(&stab->lock);
+       spin_lock_init(&stab->lock);
 
        stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
                                       sizeof(struct sock *),
@@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
        struct sock *sk;
        int err = 0;
 
-       raw_spin_lock_bh(&stab->lock);
+       spin_lock_bh(&stab->lock);
        sk = *psk;
        if (!sk_test || sk_test == sk)
                sk = xchg(psk, NULL);
@@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
        else
                err = -EINVAL;
 
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        return err;
 }
 
@@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
        psock = sk_psock(sk);
        WARN_ON_ONCE(!psock);
 
-       raw_spin_lock_bh(&stab->lock);
+       spin_lock_bh(&stab->lock);
        osk = stab->sks[idx];
        if (osk && flags == BPF_NOEXIST) {
                ret = -EEXIST;
@@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
        stab->sks[idx] = sk;
        if (osk)
                sock_map_unref(osk, &stab->sks[idx]);
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        return 0;
 out_unlock:
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        if (psock)
                sk_psock_put(sk, psock);
 out_free:
@@ -835,7 +835,7 @@ struct bpf_shtab_elem {
 
 struct bpf_shtab_bucket {
        struct hlist_head head;
-       raw_spinlock_t lock;
+       spinlock_t lock;
 };
 
 struct bpf_shtab {
@@ -910,7 +910,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
         * is okay since it's going away only after RCU grace period.
         * However, we need to check whether it's still present.
         */
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
                                               elem->key, map->key_size);
        if (elem_probe && elem_probe == elem) {
@@ -918,7 +918,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
                sock_map_unref(elem->sk, elem);
                sock_hash_free_elem(htab, elem);
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
 }
 
 static long sock_hash_delete_elem(struct bpf_map *map, void *key)
@@ -932,7 +932,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
        hash = sock_hash_bucket_hash(key, key_size);
        bucket = sock_hash_select_bucket(htab, hash);
 
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
        if (elem) {
                hlist_del_rcu(&elem->node);
@@ -940,7 +940,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
                sock_hash_free_elem(htab, elem);
                ret = 0;
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        return ret;
 }
 
@@ -1000,7 +1000,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
        hash = sock_hash_bucket_hash(key, key_size);
        bucket = sock_hash_select_bucket(htab, hash);
 
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
        if (elem && flags == BPF_NOEXIST) {
                ret = -EEXIST;
@@ -1026,10 +1026,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
                sock_map_unref(elem->sk, elem);
                sock_hash_free_elem(htab, elem);
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        return 0;
 out_unlock:
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        sk_psock_put(sk, psock);
 out_free:
        sk_psock_free_link(link);
@@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
 
        for (i = 0; i < htab->buckets_num; i++) {
                INIT_HLIST_HEAD(&htab->buckets[i].head);
-               raw_spin_lock_init(&htab->buckets[i].lock);
+               spin_lock_init(&htab->buckets[i].lock);
        }
 
        return &htab->map;
@@ -1147,11 +1147,11 @@ static void sock_hash_free(struct bpf_map *map)
                 * exists, psock exists and holds a ref to socket. That
                 * lets us to grab a socket ref too.
                 */
-               raw_spin_lock_bh(&bucket->lock);
+               spin_lock_bh(&bucket->lock);
                hlist_for_each_entry(elem, &bucket->head, node)
                        sock_hold(elem->sk);
                hlist_move_list(&bucket->head, &unlink_list);
-               raw_spin_unlock_bh(&bucket->lock);
+               spin_unlock_bh(&bucket->lock);
 
                /* Process removed entries out of atomic context to
                 * block for socket lock before deleting the psock's
index 1086653..d0bc1dd 100644 (file)
@@ -157,26 +157,24 @@ out_status:
 int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = sock_net(skb->sk);
-       struct handshake_req *req = NULL;
-       struct socket *sock = NULL;
+       struct handshake_req *req;
+       struct socket *sock;
        int fd, status, err;
 
        if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
                return -EINVAL;
        fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
 
-       err = 0;
        sock = sockfd_lookup(fd, &err);
-       if (err) {
-               err = -EBADF;
-               goto out_status;
-       }
+       if (!sock)
+               return err;
 
        req = handshake_req_hash_lookup(sock->sk);
        if (!req) {
                err = -EBUSY;
+               trace_handshake_cmd_done_err(net, req, sock->sk, err);
                fput(sock->file);
-               goto out_status;
+               return err;
        }
 
        trace_handshake_cmd_done(net, req, sock->sk, fd);
@@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
        handshake_complete(req, status, info);
        fput(sock->file);
        return 0;
-
-out_status:
-       trace_handshake_cmd_done_err(net, req, sock->sk, err);
-       return err;
 }
 
 static unsigned int handshake_net_id;
index 65ba18a..eafa4a0 100644 (file)
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
                                hlist_del(&nexthop_nh->nh_hash);
                        } endfor_nexthops(fi)
                }
-               fi->fib_dead = 1;
+               /* Paired with READ_ONCE() from fib_table_lookup() */
+               WRITE_ONCE(fi->fib_dead, 1);
                fib_info_put(fi);
        }
        spin_unlock_bh(&fib_info_lock);
@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 link_it:
        ofi = fib_find_info(fi);
        if (ofi) {
+               /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
                refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1621,7 @@ err_inval:
 
 failure:
        if (fi) {
+               /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
        }
index 74d403d..d13fb9e 100644 (file)
@@ -1582,7 +1582,8 @@ found:
                if (fa->fa_dscp &&
                    inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
                        continue;
-               if (fi->fib_dead)
+               /* Paired with WRITE_ONCE() in fib_release_info() */
+               if (READ_ONCE(fi->fib_dead))
                        continue;
                if (fa->fa_info->fib_scope < flp->flowi4_scope)
                        continue;
index 0c9e768..418e5fb 100644 (file)
@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
        struct flowi4 fl4;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
-       unsigned int size = mtu;
+       unsigned int size;
 
+       size = min(mtu, IP_MAX_MTU);
        while (1) {
                skb = alloc_skb(size + hlen + tlen,
                                GFP_ATOMIC | __GFP_NOWARN);
index e18931a..66fac12 100644 (file)
@@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
        struct ip_options *opt  = &(IPCB(skb)->opt);
 
        __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-       __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
 #ifdef CONFIG_NET_SWITCHDEV
        if (skb->offload_l3_fwd_mark) {
index fe9ead9..5e9c815 100644 (file)
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 static struct sk_buff *ip_extract_route_hint(const struct net *net,
                                             struct sk_buff *skb, int rt_type)
 {
-       if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+       if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+           IPCB(skb)->flags & IPSKB_MULTIPATH)
                return NULL;
 
        return skb;
index 43ba4b7..4ab877c 100644 (file)
@@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        } else if (rt->rt_type == RTN_BROADCAST)
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
 
+       /* OUTOCTETS should be counted after fragment */
+       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
                skb = skb_expand_head(skb, hh_len);
                if (!skb)
@@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
        /*
         *      If the indicated interface is up and running, send the packet.
         */
-       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
        skb->dev = dev;
        skb->protocol = htons(ETH_P_IP);
 
@@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 
-       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
        skb->dev = dev;
        skb->protocol = htons(ETH_P_IP);
 
@@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
        paged = !!cork->gso_size;
 
        if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
index d1c7366..cce9cb2 100644 (file)
@@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
         * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
         */
        info = PKTINFO_SKB_CB(skb);
-       if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+       if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
            !info->ipi_ifindex)
                return false;
 
index 3f0c6d6..9e222a5 100644 (file)
@@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
        struct ip_options *opt = &(IPCB(skb)->opt);
 
        IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-       IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
        if (unlikely(opt->optlen))
                ip_forward_options(skb);
index d8c99bd..66f419e 100644 (file)
@@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
                int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
                fib_select_multipath(res, h);
+               IPCB(skb)->flags |= IPSKB_MULTIPATH;
        }
 #endif
 
index b155948..0c3040a 100644 (file)
@@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
                        }
                }
 
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+               if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
                        has_timestamping = true;
                else
                        tss->ts[0] = (struct timespec64) {0};
        }
 
        if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+               if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
                        has_timestamping = true;
                else
                        tss->ts[2] = (struct timespec64) {0};
index e6b4fbd..ccfc8bb 100644 (file)
@@ -3474,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
        if (delta <= 0)
                return;
        amt = sk_mem_pages(delta);
-       sk->sk_forward_alloc += amt << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
        sk_memory_allocated_add(sk, amt);
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
index 0794a2c..f39b9c8 100644 (file)
@@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
                spin_lock(&sk_queue->lock);
 
 
-       sk->sk_forward_alloc += size;
+       sk_forward_alloc_add(sk, size);
        amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
-       sk->sk_forward_alloc -= amt;
+       sk_forward_alloc_add(sk, -amt);
 
        if (amt)
                __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
                goto uncharge_drop;
        }
 
-       sk->sk_forward_alloc -= size;
+       sk_forward_alloc_add(sk, -size);
 
        /* no need to setup a destructor, we will explicitly release the
         * forward allocated memory on dequeue
index 967913a..0b6ee96 100644 (file)
@@ -1378,7 +1378,7 @@ retry:
         * idev->desync_factor if it's larger
         */
        cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
-       max_desync_factor = min_t(__u32,
+       max_desync_factor = min_t(long,
                                  idev->cnf.max_desync_factor,
                                  cnf_temp_preferred_lft - regen_advance);
 
index d94041b..b837881 100644 (file)
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
 static struct sk_buff *ip6_extract_route_hint(const struct net *net,
                                              struct sk_buff *skb)
 {
-       if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+       if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+           IP6CB(skb)->flags & IP6SKB_MULTIPATH)
                return NULL;
 
        return skb;
index 0665e8b..54fc4c7 100644 (file)
@@ -451,7 +451,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
        struct dst_entry *dst = skb_dst(skb);
 
        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-       __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 
 #ifdef CONFIG_NET_SWITCHDEV
        if (skb->offload_l3_fwd_mark) {
@@ -1502,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk,
        orig_mtu = mtu;
 
        if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
index 67a3b8f..30ca064 100644 (file)
@@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
 {
        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
                      IPSTATS_MIB_OUTFORWDATAGRAMS);
-       IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                     IPSTATS_MIB_OUTOCTETS, skb->len);
        return dst_output(net, sk, skb);
 }
 
index 1b27728..5831aaa 100644 (file)
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                return -EINVAL;
 
        ipcm6_init_sk(&ipc6, np);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        fl6.flowi6_oif = oif;
index 0eae766..42fcec3 100644 (file)
@@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.flowi6_uid = sk->sk_uid;
 
        ipcm6_init(&ipc6);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = fl6.flowi6_mark;
 
        if (sin6) {
index d15a9e3..9c687b3 100644 (file)
@@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
        if (match->nh && have_oif_match && res->nh)
                return;
 
+       if (skb)
+               IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
        /* We might have already computed the hash for ICMPv6 errors. In such
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
index ebc6ae4..86b5d50 100644 (file)
@@ -1339,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        ipcm6_init(&ipc6);
        ipc6.gso_size = READ_ONCE(up->gso_size);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        /* destination address check */
index 393f01b..4580f61 100644 (file)
@@ -1859,6 +1859,8 @@ static __net_exit void kcm_exit_net(struct net *net)
         * that all multiplexors and psocks have been destroyed.
         */
        WARN_ON(!list_empty(&knet->mux_list));
+
+       mutex_destroy(&knet->mutex);
 }
 
 static struct pernet_operations kcm_net_ops = {
index 933b257..a7fc16f 100644 (file)
@@ -134,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
        __kfree_skb(skb);
 }
 
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+       WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+                  mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
 static void mptcp_rmem_charge(struct sock *sk, int size)
 {
-       mptcp_sk(sk)->rmem_fwd_alloc -= size;
+       mptcp_rmem_fwd_alloc_add(sk, -size);
 }
 
 static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
@@ -177,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
 static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
 {
        amount >>= PAGE_SHIFT;
-       mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+       mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
        __sk_mem_reduce_allocated(sk, amount);
 }
 
@@ -186,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
        struct mptcp_sock *msk = mptcp_sk(sk);
        int reclaimable;
 
-       msk->rmem_fwd_alloc += size;
+       mptcp_rmem_fwd_alloc_add(sk, size);
        reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
 
        /* see sk_mem_uncharge() for the rationale behind the following schema */
@@ -341,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
        if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
                return false;
 
-       msk->rmem_fwd_alloc += amount;
+       mptcp_rmem_fwd_alloc_add(sk, amount);
        return true;
 }
 
@@ -1800,7 +1806,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                }
 
                /* data successfully copied into the write queue */
-               sk->sk_forward_alloc -= total_ts;
+               sk_forward_alloc_add(sk, -total_ts);
                copied += psize;
                dfrag->data_len += psize;
                frag_truesize += psize;
@@ -3257,8 +3263,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
        /* move all the rx fwd alloc into the sk_mem_reclaim_final in
         * inet_sock_destruct() will dispose it
         */
-       sk->sk_forward_alloc += msk->rmem_fwd_alloc;
-       msk->rmem_fwd_alloc = 0;
+       sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+       WRITE_ONCE(msk->rmem_fwd_alloc, 0);
        mptcp_token_destroy(msk);
        mptcp_pm_free_anno_list(msk);
        mptcp_free_local_addr_list(msk);
@@ -3522,7 +3528,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
 
 static int mptcp_forward_alloc_get(const struct sock *sk)
 {
-       return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+       return READ_ONCE(sk->sk_forward_alloc) +
+              READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
 }
 
 static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
index 005a7ce..bf4f91b 100644 (file)
@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
 #define IP_SET_HASH_WITH_PROTO
 #define IP_SET_HASH_WITH_NETS
 #define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
 
 /* IPv4 variant */
 
index 41b826d..e429ebb 100644 (file)
@@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
        [NFT_MSG_NEWFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
        [NFT_MSG_GETFLOWTABLE]  = AUDIT_NFT_OP_INVALID,
        [NFT_MSG_DELFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+       [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
 };
 
 static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
@@ -3421,6 +3422,18 @@ err:
        nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
+static void audit_log_rule_reset(const struct nft_table *table,
+                                unsigned int base_seq,
+                                unsigned int nentries)
+{
+       char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+                             table->name, base_seq);
+
+       audit_log_nfcfg(buf, table->family, nentries,
+                       AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+       kfree(buf);
+}
+
 struct nft_rule_dump_ctx {
        char *table;
        char *chain;
@@ -3467,6 +3480,10 @@ cont:
 cont_skip:
                (*idx)++;
        }
+
+       if (reset && *idx)
+               audit_log_rule_reset(table, cb->seq, *idx);
+
        return 0;
 }
 
@@ -3634,6 +3651,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
        if (err < 0)
                goto err_fill_rule_info;
 
+       if (reset)
+               audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+
        return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
 
 err_fill_rule_info:
@@ -5624,13 +5644,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
        return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
 }
 
+static void audit_log_nft_set_reset(const struct nft_table *table,
+                                   unsigned int base_seq,
+                                   unsigned int nentries)
+{
+       char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+       audit_log_nfcfg(buf, table->family, nentries,
+                       AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+       kfree(buf);
+}
+
 struct nft_set_dump_ctx {
        const struct nft_set    *set;
        struct nft_ctx          ctx;
 };
 
 static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
-                                const struct nft_set *set, bool reset)
+                                const struct nft_set *set, bool reset,
+                                unsigned int base_seq)
 {
        struct nft_set_elem_catchall *catchall;
        u8 genmask = nft_genmask_cur(net);
@@ -5646,6 +5678,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
 
                elem.priv = catchall->elem;
                ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+               if (reset && !ret)
+                       audit_log_nft_set_reset(set->table, base_seq, 1);
                break;
        }
 
@@ -5725,12 +5759,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
        set->ops->walk(&dump_ctx->ctx, set, &args.iter);
 
        if (!args.iter.err && args.iter.count == cb->args[0])
-               args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
-       rcu_read_unlock();
-
+               args.iter.err = nft_set_catchall_dump(net, skb, set,
+                                                     reset, cb->seq);
        nla_nest_end(skb, nest);
        nlmsg_end(skb, nlh);
 
+       if (reset && args.iter.count > args.iter.skip)
+               audit_log_nft_set_reset(table, cb->seq,
+                                       args.iter.count - args.iter.skip);
+
+       rcu_read_unlock();
+
        if (args.iter.err && args.iter.err != -EMSGSIZE)
                return args.iter.err;
        if (args.iter.count == cb->args[0])
@@ -5955,13 +5994,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
        struct netlink_ext_ack *extack = info->extack;
        u8 genmask = nft_genmask_cur(info->net);
        u8 family = info->nfmsg->nfgen_family;
+       int rem, err = 0, nelems = 0;
        struct net *net = info->net;
        struct nft_table *table;
        struct nft_set *set;
        struct nlattr *attr;
        struct nft_ctx ctx;
        bool reset = false;
-       int rem, err = 0;
 
        table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
                                 genmask, 0);
@@ -6004,8 +6043,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
                        NL_SET_BAD_ATTR(extack, attr);
                        break;
                }
+               nelems++;
        }
 
+       if (reset)
+               audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
+                                       nelems);
+
        return err;
 }
 
index 8f1bfa6..50723ba 100644 (file)
@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
 
        f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
 
+       if (f->opt_num > ARRAY_SIZE(f->opt))
+               return -EINVAL;
+
+       if (!memchr(f->genre, 0, MAXGENRELEN) ||
+           !memchr(f->subtype, 0, MAXGENRELEN) ||
+           !memchr(f->version, 0, MAXGENRELEN))
+               return -EINVAL;
+
        kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
        if (!kf)
                return -ENOMEM;
index 7f856ce..3fbaa7b 100644 (file)
@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
                return opt[offset + 1];
 }
 
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+       if (len % NFT_REG32_SIZE)
+               dest[len / NFT_REG32_SIZE] = 0;
+
+       return skb_copy_bits(skb, offset, dest, len);
+}
+
 static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
                                 struct nft_regs *regs,
                                 const struct nft_pktinfo *pkt)
@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
        }
        offset += priv->offset;
 
-       dest[priv->len / NFT_REG32_SIZE] = 0;
-       if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+       if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
                goto err;
        return;
 err:
@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
        }
        offset += priv->offset;
 
-       dest[priv->len / NFT_REG32_SIZE] = 0;
-       if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+       if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
                goto err;
        return;
 err:
@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
                if (priv->flags & NFT_EXTHDR_F_PRESENT) {
                        *dest = 1;
                } else {
-                       dest[priv->len / NFT_REG32_SIZE] = 0;
+                       if (priv->len % NFT_REG32_SIZE)
+                               dest[priv->len / NFT_REG32_SIZE] = 0;
                        memcpy(dest, opt + offset, priv->len);
                }
 
@@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
        if (!tcph)
                goto err;
 
+       if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+               goto err;
+
+       tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
        opt = (u8 *)tcph;
+
        for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
                union {
                        __be16 v16;
@@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
                if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
                        goto err;
 
-               if (skb_ensure_writable(pkt->skb,
-                                       nft_thoff(pkt) + i + priv->len))
-                       goto err;
-
-               tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
-                                             &tcphdr_len);
-               if (!tcph)
-                       goto err;
-
                offset = i + priv->offset;
 
                switch (priv->len) {
@@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
        if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
                goto drop;
 
-       opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
-       if (!opt)
-               goto err;
+       tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+       opt = (u8 *)tcph;
+
        for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
                unsigned int j;
 
@@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
                            offset + ntohs(sch->length) > pkt->skb->len)
                                break;
 
-                       dest[priv->len / NFT_REG32_SIZE] = 0;
-                       if (skb_copy_bits(pkt->skb, offset + priv->offset,
-                                         dest, priv->len) < 0)
+                       if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+                                               dest, priv->len) < 0)
                                break;
                        return;
                }
index c6435e7..f250b53 100644 (file)
@@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
        struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
        struct rb_node *node, *next, *parent, **p, *first = NULL;
        struct nft_rbtree *priv = nft_set_priv(set);
+       u8 cur_genmask = nft_genmask_cur(net);
        u8 genmask = nft_genmask_next(net);
        int d, err;
 
@@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
                if (!nft_set_elem_active(&rbe->ext, genmask))
                        continue;
 
-               /* perform garbage collection to avoid bogus overlap reports. */
-               if (nft_set_elem_expired(&rbe->ext)) {
+               /* perform garbage collection to avoid bogus overlap reports
+                * but skip new elements in this transaction.
+                */
+               if (nft_set_elem_expired(&rbe->ext) &&
+                   nft_set_elem_active(&rbe->ext, cur_genmask)) {
                        err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
                        if (err < 0)
                                return err;
index e896109..b46a6a5 100644 (file)
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
 {
        const struct xt_sctp_info *info = par->matchinfo;
 
+       if (info->flag_count > ARRAY_SIZE(info->flag_info))
+               return -EINVAL;
        if (info->flags & ~XT_SCTP_VALID_FLAGS)
                return -EINVAL;
        if (info->invflags & ~XT_SCTP_VALID_FLAGS)
index 177b40d..117d461 100644 (file)
@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
        return ret ^ data->invert;
 }
 
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+       const struct xt_u32 *data = par->matchinfo;
+       const struct xt_u32_test *ct;
+       unsigned int i;
+
+       if (data->ntests > ARRAY_SIZE(data->tests))
+               return -EINVAL;
+
+       for (i = 0; i < data->ntests; ++i) {
+               ct = &data->tests[i];
+
+               if (ct->nnums > ARRAY_SIZE(ct->location) ||
+                   ct->nvalues > ARRAY_SIZE(ct->value))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 static struct xt_match xt_u32_mt_reg __read_mostly = {
        .name       = "u32",
        .revision   = 0,
        .family     = NFPROTO_UNSPEC,
        .match      = u32_mt,
+       .checkentry = u32_mt_checkentry,
        .matchsize  = sizeof(struct xt_u32),
        .me         = THIS_MODULE,
 };
index 591d87d..68e6acd 100644 (file)
@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
        struct pie_params p_params;
        u32 ecn_prob;
        u32 flows_cnt;
+       u32 flows_cursor;
        u32 quantum;
        u32 memory_limit;
        u32 new_flow_count;
@@ -375,22 +376,32 @@ flow_error:
 static void fq_pie_timer(struct timer_list *t)
 {
        struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+       unsigned long next, tupdate;
        struct Qdisc *sch = q->sch;
        spinlock_t *root_lock; /* to lock qdisc for probability calculations */
-       u32 idx;
+       int max_cnt, i;
 
        rcu_read_lock();
        root_lock = qdisc_lock(qdisc_root_sleeping(sch));
        spin_lock(root_lock);
 
-       for (idx = 0; idx < q->flows_cnt; idx++)
-               pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
-                                         q->flows[idx].backlog);
-
-       /* reset the timer to fire after 'tupdate' jiffies. */
-       if (q->p_params.tupdate)
-               mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+       /* Limit this expensive loop to 2048 flows per round. */
+       max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+       for (i = 0; i < max_cnt; i++) {
+               pie_calculate_probability(&q->p_params,
+                                         &q->flows[q->flows_cursor].vars,
+                                         q->flows[q->flows_cursor].backlog);
+               q->flows_cursor++;
+       }
 
+       tupdate = q->p_params.tupdate;
+       next = 0;
+       if (q->flows_cursor >= q->flows_cnt) {
+               q->flows_cursor = 0;
+               next = tupdate;
+       }
+       if (tupdate)
+               mod_timer(&q->adapt_timer, jiffies + next);
        spin_unlock(root_lock);
        rcu_read_unlock();
 }
index ea8c4a7..35f49ed 100644 (file)
@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
        .priv_size   =       sizeof(struct plug_sched_data),
        .enqueue     =       plug_enqueue,
        .dequeue     =       plug_dequeue,
-       .peek        =       qdisc_peek_head,
+       .peek        =       qdisc_peek_dequeued,
        .init        =       plug_init,
        .change      =       plug_change,
        .reset       =       qdisc_reset_queue,
index 1a25752..546c10a 100644 (file)
@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
 }
 
 /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
-                       struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+                                  struct qfq_class *cl, unsigned int len)
 {
-       qdisc_dequeue_peeked(cl->qdisc);
+       struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+       if (!skb)
+               return NULL;
 
        cl->deficit -= (int) len;
 
@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
                cl->deficit += agg->lmax;
                list_move_tail(&cl->alist, &agg->active);
        }
+
+       return skb;
 }
 
 static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
        if (!skb)
                return NULL;
 
-       qdisc_qstats_backlog_dec(sch, skb);
        sch->q.qlen--;
+
+       skb = agg_dequeue(in_serv_agg, cl, len);
+
+       if (!skb) {
+               sch->q.qlen++;
+               return NULL;
+       }
+
+       qdisc_qstats_backlog_dec(sch, skb);
        qdisc_bstats_update(sch, skb);
 
-       agg_dequeue(in_serv_agg, cl, len);
        /* If lmax is lowered, through qfq_change_class, for a class
         * owning pending packets with larger size than the new value
         * of lmax, then the following condition may hold.
index f13d6a3..ec00ee7 100644 (file)
@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                assoc->init_retries, assoc->shutdown_retries,
                assoc->rtx_data_chunks,
                refcount_read(&sk->sk_wmem_alloc),
-               sk->sk_wmem_queued,
+               READ_ONCE(sk->sk_wmem_queued),
                sk->sk_sndbuf,
                sk->sk_rcvbuf);
        seq_printf(seq, "\n");
index fd0631e..ab943e8 100644 (file)
@@ -69,7 +69,7 @@
 #include <net/sctp/stream_sched.h>
 
 /* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
                                size_t msg_len);
@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 
        refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
        asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
-       sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+       sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
        sk_mem_charge(sk, chunk->skb->truesize);
 }
 
@@ -9144,7 +9144,7 @@ static void sctp_wfree(struct sk_buff *skb)
        struct sock *sk = asoc->base.sk;
 
        sk_mem_uncharge(sk, skb->truesize);
-       sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+       sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
        asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
        WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
                                      &sk->sk_wmem_alloc));
@@ -9299,9 +9299,9 @@ void sctp_write_space(struct sock *sk)
  * UDP-style sockets or TCP-style sockets, this code should work.
  *  - Daisy
  */
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
 {
-       return sk->sk_sndbuf > sk->sk_wmem_queued;
+       return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
 }
 
 /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
index 77f2832..c8b08b3 100644 (file)
@@ -827,7 +827,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
 
 static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
 {
-       bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+       bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
        struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
        struct net_device *orig_dev;
        ktime_t hwtstamp;
@@ -879,12 +879,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
        int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
        int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
        struct scm_timestamping_internal tss;
-
        int empty = 1, false_tstamp = 0;
        struct skb_shared_hwtstamps *shhwtstamps =
                skb_hwtstamps(skb);
        int if_index;
        ktime_t hwtstamp;
+       u32 tsflags;
 
        /* Race occurred between timestamp enabling and packet
           receiving.  Fill in the current time for now. */
@@ -926,11 +926,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
        }
 
        memset(&tss, 0, sizeof(tss));
-       if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+       tsflags = READ_ONCE(sk->sk_tsflags);
+       if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
            ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
                empty = 0;
        if (shhwtstamps &&
-           (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+           (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
            !skb_is_swtx_tstamp(skb, false_tstamp)) {
                if_index = 0;
                if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -938,14 +939,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                else
                        hwtstamp = shhwtstamps->hwtstamp;
 
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+               if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
                        hwtstamp = ptp_convert_timestamp(&hwtstamp,
-                                                        sk->sk_bind_phc);
+                                                        READ_ONCE(sk->sk_bind_phc));
 
                if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
                        empty = 0;
 
-                       if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+                       if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
                            !skb_is_err_queue(skb))
                                put_ts_pktinfo(msg, skb, if_index);
                }
index 86930a8..3e8a04a 100644 (file)
@@ -680,7 +680,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
         *        What the above comment does talk about? --ANK(980817)
         */
 
-       if (unix_tot_inflight)
+       if (READ_ONCE(unix_tot_inflight))
                unix_gc();              /* Garbage collect fds */
 }
 
index e9dde71..6ff628f 100644 (file)
@@ -64,7 +64,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
                /* Paired with READ_ONCE() in wait_for_unix_gc() */
                WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
        }
-       user->unix_inflight++;
+       WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
        spin_unlock(&unix_gc_lock);
 }
 
@@ -85,7 +85,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
                /* Paired with READ_ONCE() in wait_for_unix_gc() */
                WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
        }
-       user->unix_inflight--;
+       WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
        spin_unlock(&unix_gc_lock);
 }
 
@@ -99,7 +99,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 {
        struct user_struct *user = current_user();
 
-       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+       if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
        return false;
 }
index fcfc847..55f8b9b 100644 (file)
@@ -602,7 +602,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
 
        for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
                if (unlikely(i >= MAX_SKB_FRAGS))
-                       return ERR_PTR(-EFAULT);
+                       return ERR_PTR(-EOVERFLOW);
 
                page = pool->umem->pgs[addr >> PAGE_SHIFT];
                get_page(page);
@@ -655,15 +655,17 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
                        skb_put(skb, len);
 
                        err = skb_store_bits(skb, 0, buffer, len);
-                       if (unlikely(err))
+                       if (unlikely(err)) {
+                               kfree_skb(skb);
                                goto free_err;
+                       }
                } else {
                        int nr_frags = skb_shinfo(skb)->nr_frags;
                        struct page *page;
                        u8 *vaddr;
 
                        if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
-                               err = -EFAULT;
+                               err = -EOVERFLOW;
                                goto free_err;
                        }
 
@@ -690,12 +692,14 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
        return skb;
 
 free_err:
-       if (err == -EAGAIN) {
-               xsk_cq_cancel_locked(xs, 1);
-       } else {
-               xsk_set_destructor_arg(skb);
-               xsk_drop_skb(skb);
+       if (err == -EOVERFLOW) {
+               /* Drop the packet */
+               xsk_set_destructor_arg(xs->skb);
+               xsk_drop_skb(xs->skb);
                xskq_cons_release(xs->tx);
+       } else {
+               /* Let application retry */
+               xsk_cq_cancel_locked(xs, 1);
        }
 
        return ERR_PTR(err);
@@ -738,7 +742,7 @@ static int __xsk_generic_xmit(struct sock *sk)
                skb = xsk_build_skb(xs, &desc);
                if (IS_ERR(skb)) {
                        err = PTR_ERR(skb);
-                       if (err == -EAGAIN)
+                       if (err != -EOVERFLOW)
                                goto out;
                        err = 0;
                        continue;
index c014217..22b36c8 100644 (file)
@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
        sock_diag_save_cookie(sk, msg->xdiag_cookie);
 
        mutex_lock(&xs->mutex);
+       if (READ_ONCE(xs->state) == XSK_UNBOUND)
+               goto out_nlmsg_trim;
+
        if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
                goto out_nlmsg_trim;
 
index 40cd13e..2fe6f28 100644 (file)
@@ -6,7 +6,79 @@
 # They are independent, and can be combined like W=12 or W=123e.
 # ==========================================================================
 
-KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+# Default set of warnings, always enabled
+KBUILD_CFLAGS += -Wall
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -Werror=implicit-function-declaration
+KBUILD_CFLAGS += -Werror=implicit-int
+KBUILD_CFLAGS += -Werror=return-type
+KBUILD_CFLAGS += -Werror=strict-prototypes
+KBUILD_CFLAGS += -Wno-format-security
+KBUILD_CFLAGS += -Wno-trigraphs
+KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+
+ifneq ($(CONFIG_FRAME_WARN),0)
+KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
+endif
+
+KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
+KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
+
+ifdef CONFIG_CC_IS_CLANG
+# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
+KBUILD_CFLAGS += -Wno-gnu
+else
+
+# gcc inanely warns about local variables called 'main'
+KBUILD_CFLAGS += -Wno-main
+endif
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+
+# These result in bogus false positives
+KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
+
+# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
+KBUILD_CFLAGS += -Wvla
+
+# disable pointer signed / unsigned warnings in gcc 4.0
+KBUILD_CFLAGS += -Wno-pointer-sign
+
+# In order to make sure new function cast mismatches are not introduced
+# in the kernel (to avoid tripping CFI checking), the kernel should be
+# globally built with -Wcast-function-type.
+KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
+
+# The allocators already balk at large sizes, so silence the compiler
+# warnings for bounds checks involving those possible values. While
+# -Wno-alloc-size-larger-than would normally be used here, earlier versions
+# of gcc (<9.1) weirdly don't handle the option correctly when _other_
+# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
+# doesn't work (as it is documented to), silently resolving to "0" prior to
+# version 9.1 (and producing an error more recently). Numeric values larger
+# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
+# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
+# choice, we must perform a versioned check to disable this warning.
+# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
+KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+
+# Prohibit date/time macros, which would make the build non-deterministic
+KBUILD_CFLAGS += -Werror=date-time
+
+# enforce correct pointer usage
+KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
+
+# Require designated initializers for all marked structures
+KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
+
+# Warn if there is an enum types mismatch
+KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
 
 # backward compatibility
 KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)
@@ -24,6 +96,7 @@ ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
 
 KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
 KBUILD_CFLAGS += -Wmissing-declarations
+KBUILD_CFLAGS += $(call cc-option, -Wrestrict)
 KBUILD_CFLAGS += -Wmissing-format-attribute
 KBUILD_CFLAGS += -Wmissing-prototypes
 KBUILD_CFLAGS += -Wold-style-definition
@@ -31,12 +104,10 @@ KBUILD_CFLAGS += -Wmissing-include-dirs
 KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
 KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
+KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow)
 KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
-# The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-missing-field-initializers
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-type-limits
-KBUILD_CFLAGS += -Wno-shift-negative-value
 
 KBUILD_CPPFLAGS += -Wundef
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
@@ -45,9 +116,16 @@ else
 
 # Some diagnostics enabled by default are noisy.
 # Suppress them by using -Wno... except for W=1.
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
 ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
 # Clang before clang-16 would warn on default argument promotions.
 ifneq ($(call clang-min-version, 160000),y)
 # Disable -Wformat
@@ -61,7 +139,6 @@ ifeq ($(call clang-min-version, 120000),y)
 KBUILD_CFLAGS += -Wformat-insufficient-args
 endif
 endif
-KBUILD_CFLAGS += -Wno-sign-compare
 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
 KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
 KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
@@ -83,8 +160,25 @@ KBUILD_CFLAGS += -Wtype-limits
 KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
 
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Winitializer-overrides
+endif
+
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
 
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-missing-field-initializers
+KBUILD_CFLAGS += -Wno-type-limits
+KBUILD_CFLAGS += -Wno-shift-negative-value
+
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Wno-initializer-overrides
+else
+KBUILD_CFLAGS += -Wno-maybe-uninitialized
+endif
+
 endif
 
 #
@@ -106,6 +200,11 @@ KBUILD_CFLAGS += $(call cc-option, -Wpacked-bitfield-compat)
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3
 
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-sign-compare
+
 endif
 
 #
index ab0c5bd..c59cc57 100644 (file)
@@ -9,6 +9,35 @@ __modinst:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
+install-y :=
+
+ifeq ($(KBUILD_EXTMOD)$(sign-only),)
+
+# remove the old directory and symlink
+$(shell rm -fr $(MODLIB)/kernel $(MODLIB)/build)
+
+install-$(CONFIG_MODULES) += $(addprefix $(MODLIB)/, build modules.order)
+
+$(MODLIB)/build: FORCE
+       $(call cmd,symlink)
+
+quiet_cmd_symlink = SYMLINK $@
+      cmd_symlink = ln -s $(CURDIR) $@
+
+$(MODLIB)/modules.order: modules.order FORCE
+       $(call cmd,install_modorder)
+
+quiet_cmd_install_modorder = INSTALL $@
+      cmd_install_modorder = sed 's:^\(.*\)\.o$$:kernel/\1.ko:' $< > $@
+
+# Install modules.builtin(.modinfo) even when CONFIG_MODULES is disabled.
+install-y += $(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo)
+
+$(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo): $(MODLIB)/%: % FORCE
+       $(call cmd,install)
+
+endif
+
 modules := $(call read-file, $(MODORDER))
 
 ifeq ($(KBUILD_EXTMOD),)
@@ -27,15 +56,16 @@ suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz
 suffix-$(CONFIG_MODULE_COMPRESS_ZSTD)  := .zst
 
 modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules))
+install-$(CONFIG_MODULES) += $(modules)
 
-__modinst: $(modules)
+__modinst: $(install-y)
        @:
 
 #
 # Installation
 #
 quiet_cmd_install = INSTALL $@
-      cmd_install = mkdir -p $(dir $@); cp $< $@
+      cmd_install = cp $< $@
 
 # Strip
 #
@@ -65,7 +95,6 @@ endif
 # Signing
 # Don't stop modules_install even if we can't sign external modules.
 #
-ifeq ($(CONFIG_MODULE_SIG_ALL),y)
 ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
 sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY)
 else
@@ -74,18 +103,34 @@ endif
 quiet_cmd_sign = SIGN    $@
       cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \
                  $(if $(KBUILD_EXTMOD),|| true)
-else
+
+ifeq ($(sign-only),)
+
+# During modules_install, modules are signed only when CONFIG_MODULE_SIG_ALL=y.
+ifndef CONFIG_MODULE_SIG_ALL
 quiet_cmd_sign :=
       cmd_sign := :
 endif
 
-ifeq ($(modules_sign_only),)
+# Create necessary directories
+$(shell mkdir -p $(sort $(dir $(install-y))))
 
 $(dst)/%.ko: $(extmod_prefix)%.ko FORCE
        $(call cmd,install)
        $(call cmd,strip)
        $(call cmd,sign)
 
+ifdef CONFIG_MODULES
+__modinst: depmod
+
+PHONY += depmod
+depmod: $(install-y)
+       $(call cmd,depmod)
+
+quiet_cmd_depmod = DEPMOD  $(MODLIB)
+      cmd_depmod = $(srctree)/scripts/depmod.sh $(KERNELRELEASE)
+endif
+
 else
 
 $(dst)/%.ko: FORCE
index 39472e8..739402f 100644 (file)
@@ -41,6 +41,7 @@ include $(srctree)/scripts/Kbuild.include
 MODPOST = scripts/mod/modpost
 
 modpost-args =                                                                         \
+       $(if $(CONFIG_MODULES),-M)                                                      \
        $(if $(CONFIG_MODVERSIONS),-m)                                                  \
        $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)                                        \
        $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)                                  \
index 92dbc88..2bcab02 100644 (file)
@@ -11,7 +11,6 @@ TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \
                samples scripts security sound tools usr virt \
                .config Makefile \
                Kbuild Kconfig COPYING $(wildcard localversion*)
-MKSPEC     := $(srctree)/scripts/package/mkspec
 
 quiet_cmd_src_tar = TAR     $(2).tar.gz
       cmd_src_tar = \
@@ -66,30 +65,38 @@ $(linux-tarballs): archive-args = --prefix=linux/ $$(cat $<)
 $(linux-tarballs): .tmp_HEAD FORCE
        $(call if_changed,archive)
 
-# rpm-pkg
+# rpm-pkg srcrpm-pkg binrpm-pkg
 # ---------------------------------------------------------------------------
-PHONY += rpm-pkg
-rpm-pkg: srpm = $(shell rpmspec --srpm --query --queryformat='%{name}-%{VERSION}-%{RELEASE}.src.rpm' kernel.spec)
-rpm-pkg: srcrpm-pkg
-       +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -rb $(srpm) \
-       --define='_smp_mflags %{nil}'
 
-# srcrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += srcrpm-pkg
-srcrpm-pkg: linux.tar.gz
-       $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
-       +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -bs kernel.spec \
-       --define='_smp_mflags %{nil}' --define='_sourcedir rpmbuild/SOURCES' --define='_srcrpmdir .'
+quiet_cmd_mkspec = GEN     $@
+      cmd_mkspec = $(srctree)/scripts/package/mkspec > $@
 
-# binrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += binrpm-pkg
-binrpm-pkg:
-       $(MAKE) -f $(srctree)/Makefile
-       $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
-       +rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
-               $(UTS_MACHINE)-linux -bb $(objtree)/binkernel.spec
+kernel.spec: FORCE
+       $(call cmd,mkspec)
+
+PHONY += rpm-sources
+rpm-sources: linux.tar.gz
+       $(Q)mkdir -p rpmbuild/SOURCES
+       $(Q)ln -f linux.tar.gz rpmbuild/SOURCES/linux.tar.gz
+       $(Q)cp $(KCONFIG_CONFIG) rpmbuild/SOURCES/config
+       $(Q)$(srctree)/scripts/package/gen-diff-patch rpmbuild/SOURCES/diff.patch
+
+PHONY += rpm-pkg srcrpm-pkg binrpm-pkg
+
+rpm-pkg:    private build-type := a
+srcrpm-pkg: private build-type := s
+binrpm-pkg: private build-type := b
+
+rpm-pkg srcrpm-pkg: rpm-sources
+rpm-pkg srcrpm-pkg binrpm-pkg: kernel.spec
+       +$(strip rpmbuild -b$(build-type) kernel.spec \
+       --define='_topdir $(abspath rpmbuild)' \
+       $(if $(filter a b, $(build-type)), \
+               --target $(UTS_MACHINE)-linux --build-in-place --noprep --define='_smp_mflags %{nil}' \
+               $$(rpm -q rpm >/dev/null 2>&1 || echo --nodeps)) \
+       $(if $(filter b, $(build-type)), \
+               --without devel) \
+       $(RPMOPTS))
 
 # deb-pkg srcdeb-pkg bindeb-pkg
 # ---------------------------------------------------------------------------
@@ -141,14 +148,10 @@ deb-pkg srcdeb-pkg bindeb-pkg:
        $(if $(findstring source, $(build-type)), \
                --unsigned-source --compression=$(KDEB_SOURCE_COMPRESS)) \
        $(if $(findstring binary, $(build-type)), \
-               -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
+               --rules-file='$(MAKE) -f debian/rules' --jobs=1 -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
                --no-check-builddeps) \
        $(DPKG_FLAGS))
 
-PHONY += intdeb-pkg
-intdeb-pkg:
-       +$(CONFIG_SHELL) $(srctree)/scripts/package/builddeb
-
 # snap-pkg
 # ---------------------------------------------------------------------------
 PHONY += snap-pkg
index eaae2ce..61b7ddd 100755 (executable)
@@ -59,9 +59,9 @@ class Helper(APIElement):
         Break down helper function protocol into smaller chunks: return type,
         name, distincts arguments.
         """
-        arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
+        arg_re = re.compile(r'((\w+ )*?(\w+|...))( (\**)(\w+))?$')
         res = {}
-        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+        proto_re = re.compile(r'(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
 
         capture = proto_re.match(self.proto)
         res['ret_type'] = capture.group(1)
@@ -114,11 +114,11 @@ class HeaderParser(object):
         return Helper(proto=proto, desc=desc, ret=ret)
 
     def parse_symbol(self):
-        p = re.compile(' \* ?(BPF\w+)$')
+        p = re.compile(r' \* ?(BPF\w+)$')
         capture = p.match(self.line)
         if not capture:
             raise NoSyscallCommandFound
-        end_re = re.compile(' \* ?NOTES$')
+        end_re = re.compile(r' \* ?NOTES$')
         end = end_re.match(self.line)
         if end:
             raise NoSyscallCommandFound
@@ -133,7 +133,7 @@ class HeaderParser(object):
         #   - Same as above, with "const" and/or "struct" in front of type
         #   - "..." (undefined number of arguments, for bpf_trace_printk())
         # There is at least one term ("void"), and at most five arguments.
-        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+        p = re.compile(r' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
         capture = p.match(self.line)
         if not capture:
             raise NoHelperFound
@@ -141,7 +141,7 @@ class HeaderParser(object):
         return capture.group(1)
 
     def parse_desc(self, proto):
-        p = re.compile(' \* ?(?:\t| {5,8})Description$')
+        p = re.compile(r' \* ?(?:\t| {5,8})Description$')
         capture = p.match(self.line)
         if not capture:
             raise Exception("No description section found for " + proto)
@@ -154,7 +154,7 @@ class HeaderParser(object):
             if self.line == ' *\n':
                 desc += '\n'
             else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
                     desc_present = True
@@ -167,7 +167,7 @@ class HeaderParser(object):
         return desc
 
     def parse_ret(self, proto):
-        p = re.compile(' \* ?(?:\t| {5,8})Return$')
+        p = re.compile(r' \* ?(?:\t| {5,8})Return$')
         capture = p.match(self.line)
         if not capture:
             raise Exception("No return section found for " + proto)
@@ -180,7 +180,7 @@ class HeaderParser(object):
             if self.line == ' *\n':
                 ret += '\n'
             else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
                     ret_present = True
@@ -219,12 +219,12 @@ class HeaderParser(object):
         self.seek_to('enum bpf_cmd {',
                      'Could not find start of bpf_cmd enum', 0)
         # Searches for either one or more BPF\w+ enums
-        bpf_p = re.compile('\s*(BPF\w+)+')
+        bpf_p = re.compile(r'\s*(BPF\w+)+')
         # Searches for an enum entry assigned to another entry,
         # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
         # not documented hence should be skipped in check to
         # determine if the right number of syscalls are documented
-        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        assign_p = re.compile(r'\s*(BPF\w+)\s*=\s*(BPF\w+)')
         bpf_cmd_str = ''
         while True:
             capture = assign_p.match(self.line)
@@ -239,7 +239,7 @@ class HeaderParser(object):
                 break
             self.line = self.reader.readline()
         # Find the number of occurences of BPF\w+
-        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+        self.enum_syscalls = re.findall(r'(BPF\w+)+', bpf_cmd_str)
 
     def parse_desc_helpers(self):
         self.seek_to(helpersDocStart,
@@ -263,7 +263,7 @@ class HeaderParser(object):
         self.seek_to('#define ___BPF_FUNC_MAPPER(FN, ctx...)',
                      'Could not find start of eBPF helper definition list')
         # Searches for one FN(\w+) define or a backslash for newline
-        p = re.compile('\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
+        p = re.compile(r'\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
         fn_defines_str = ''
         i = 0
         while True:
@@ -278,7 +278,7 @@ class HeaderParser(object):
                 break
             self.line = self.reader.readline()
         # Find the number of occurences of FN(\w+)
-        self.define_unique_helpers = re.findall('FN\(\w+, \d+, ##ctx\)', fn_defines_str)
+        self.define_unique_helpers = re.findall(r'FN\(\w+, \d+, ##ctx\)', fn_defines_str)
 
     def validate_helpers(self):
         last_helper = ''
@@ -425,7 +425,7 @@ class PrinterRST(Printer):
         try:
             cmd = ['git', 'log', '-1', '--pretty=format:%cs', '--no-patch',
                    '-L',
-                   '/{}/,/\*\//:include/uapi/linux/bpf.h'.format(delimiter)]
+                   '/{}/,/\\*\\//:include/uapi/linux/bpf.h'.format(delimiter)]
             date = subprocess.run(cmd, cwd=linuxRoot,
                                   capture_output=True, check=True)
             return date.stdout.decode().rstrip()
@@ -516,7 +516,7 @@ as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
 programs that are compatible with the GNU Privacy License (GPL).
 
 In order to use such helpers, the eBPF program must be loaded with the correct
-license string passed (via **attr**) to the **bpf**\ () system call, and this
+license string passed (via **attr**) to the **bpf**\\ () system call, and this
 generally translates into the C source code of the program containing a line
 similar to the following:
 
@@ -550,7 +550,7 @@ may be interested in:
 * The bpftool utility can be used to probe the availability of helper functions
   on the system (as well as supported program and map types, and a number of
   other parameters). To do so, run **bpftool feature probe** (see
-  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
+  **bpftool-feature**\\ (8) for details). Add the **unprivileged** keyword to
   list features available to unprivileged users.
 
 Compatibility between helper functions and program types can generally be found
@@ -562,23 +562,23 @@ other functions, themselves allowing access to additional helpers. The
 requirement for GPL license is also in those **struct bpf_func_proto**.
 
 Compatibility between helper functions and map types can be found in the
-**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+**check_map_func_compatibility**\\ () function in file *kernel/bpf/verifier.c*.
 
 Helper functions that invalidate the checks on **data** and **data_end**
 pointers for network processing are listed in function
-**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+**bpf_helper_changes_pkt_data**\\ () in file *net/core/filter.c*.
 
 SEE ALSO
 ========
 
-**bpf**\ (2),
-**bpftool**\ (8),
-**cgroups**\ (7),
-**ip**\ (8),
-**perf_event_open**\ (2),
-**sendmsg**\ (2),
-**socket**\ (7),
-**tc-bpf**\ (8)'''
+**bpf**\\ (2),
+**bpftool**\\ (8),
+**cgroups**\\ (7),
+**ip**\\ (8),
+**perf_event_open**\\ (2),
+**sendmsg**\\ (2),
+**socket**\\ (7),
+**tc-bpf**\\ (8)'''
         print(footer)
 
     def print_proto(self, helper):
@@ -598,7 +598,7 @@ SEE ALSO
             one_arg = '{}{}'.format(comma, a['type'])
             if a['name']:
                 if a['star']:
-                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+                    one_arg += ' {}**\\ '.format(a['star'].replace('*', '\\*'))
                 else:
                     one_arg += '** '
                 one_arg += '*{}*\\ **'.format(a['name'])
index 3643b4f..e22da27 100755 (executable)
@@ -1,14 +1,16 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 #
-# A depmod wrapper used by the toplevel Makefile
+# A depmod wrapper
 
-if test $# -ne 2; then
-       echo "Usage: $0 /sbin/depmod <kernelrelease>" >&2
+if test $# -ne 1; then
+       echo "Usage: $0 <kernelrelease>" >&2
        exit 1
 fi
-DEPMOD=$1
-KERNELRELEASE=$2
+
+KERNELRELEASE=$1
+
+: ${DEPMOD:=depmod}
 
 if ! test -r System.map ; then
        echo "Warning: modules_install: missing 'System.map' file. Skipping depmod." >&2
@@ -23,33 +25,8 @@ if [ -z $(command -v $DEPMOD) ]; then
        exit 0
 fi
 
-# older versions of depmod require the version string to start with three
-# numbers, so we cheat with a symlink here
-depmod_hack_needed=true
-tmp_dir=$(mktemp -d ${TMPDIR:-/tmp}/depmod.XXXXXX)
-mkdir -p "$tmp_dir/lib/modules/$KERNELRELEASE"
-if "$DEPMOD" -b "$tmp_dir" $KERNELRELEASE 2>/dev/null; then
-       if test -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep" -o \
-               -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep.bin"; then
-               depmod_hack_needed=false
-       fi
-fi
-rm -rf "$tmp_dir"
-if $depmod_hack_needed; then
-       symlink="$INSTALL_MOD_PATH/lib/modules/99.98.$KERNELRELEASE"
-       ln -s "$KERNELRELEASE" "$symlink"
-       KERNELRELEASE=99.98.$KERNELRELEASE
-fi
-
 set -- -ae -F System.map
 if test -n "$INSTALL_MOD_PATH"; then
        set -- "$@" -b "$INSTALL_MOD_PATH"
 fi
-"$DEPMOD" "$@" "$KERNELRELEASE"
-ret=$?
-
-if $depmod_hack_needed; then
-       rm -f "$symlink"
-fi
-
-exit $ret
+exec "$DEPMOD" "$@" "$KERNELRELEASE"
index 1db1889..07f6dc4 100755 (executable)
@@ -85,8 +85,7 @@ if arg_contain -S "$@"; then
        fi
 
        # For arch/powerpc/tools/gcc-check-mprofile-kernel.sh
-       if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" &&
-               arg_contain -mprofile-kernel "$@"; then
+       if arg_contain -m64 "$@" && arg_contain -mprofile-kernel "$@"; then
                if ! test -t 0 && ! grep -q notrace; then
                        echo "_mcount"
                fi
index af1c961..4eee155 100644 (file)
@@ -93,11 +93,13 @@ endif
 %_defconfig: $(obj)/conf
        $(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)
 
-configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/configs/$@)
+configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARCH)/configs/$(1))
+all-config-fragments = $(call configfiles,*.config)
+config-fragments = $(call configfiles,$@)
 
 %.config: $(obj)/conf
-       $(if $(call configfiles),, $(error No configuration exists for this target on this architecture))
-       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
+       $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture))
+       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(config-fragments)
        $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 PHONY += tinyconfig
@@ -115,6 +117,7 @@ clean-files += tests/.cache
 
 # Help text used by make help
 help:
+       @echo  'Configuration targets:'
        @echo  '  config          - Update current config utilising a line-oriented program'
        @echo  '  nconfig         - Update current config utilising a ncurses menu based program'
        @echo  '  menuconfig      - Update current config utilising a menu based program'
@@ -141,6 +144,12 @@ help:
        @echo  '                    default value without prompting'
        @echo  '  tinyconfig      - Configure the tiniest possible kernel'
        @echo  '  testconfig      - Run Kconfig unit tests (requires python3 and pytest)'
+       @echo  ''
+       @echo  'Configuration topic targets:'
+       @$(foreach f, $(all-config-fragments), \
+               if help=$$(grep -m1 '^# Help: ' $(f)); then \
+                       printf '  %-25s - %s\n' '$(notdir $(f))' "$${help#*: }"; \
+               fi;)
 
 # ===========================================================================
 # object files used by all kconfig flavours
index 992575f..4a6811d 100644 (file)
@@ -349,7 +349,11 @@ int conf_read_simple(const char *name, int def)
        char *p, *p2;
        struct symbol *sym;
        int i, def_flags;
+       const char *warn_unknown;
+       const char *werror;
 
+       warn_unknown = getenv("KCONFIG_WARN_UNKNOWN_SYMBOLS");
+       werror = getenv("KCONFIG_WERROR");
        if (name) {
                in = zconf_fopen(name);
        } else {
@@ -437,6 +441,10 @@ load:
                        if (def == S_DEF_USER) {
                                sym = sym_find(line + 2 + strlen(CONFIG_));
                                if (!sym) {
+                                       if (warn_unknown)
+                                               conf_warning("unknown symbol: %s",
+                                                            line + 2 + strlen(CONFIG_));
+
                                        conf_set_changed(true);
                                        continue;
                                }
@@ -471,7 +479,7 @@ load:
 
                        sym = sym_find(line + strlen(CONFIG_));
                        if (!sym) {
-                               if (def == S_DEF_AUTO)
+                               if (def == S_DEF_AUTO) {
                                        /*
                                         * Reading from include/config/auto.conf
                                         * If CONFIG_FOO previously existed in
@@ -479,8 +487,13 @@ load:
                                         * include/config/FOO must be touched.
                                         */
                                        conf_touch_dep(line + strlen(CONFIG_));
-                               else
+                               } else {
+                                       if (warn_unknown)
+                                               conf_warning("unknown symbol: %s",
+                                                            line + strlen(CONFIG_));
+
                                        conf_set_changed(true);
+                               }
                                continue;
                        }
 
@@ -519,6 +532,10 @@ load:
        }
        free(line);
        fclose(in);
+
+       if (conf_warnings && werror)
+               exit(1);
+
        return 0;
 }
 
index 9c9caca..4a9a23b 100644 (file)
@@ -275,7 +275,6 @@ struct jump_key {
        struct list_head entries;
        size_t offset;
        struct menu *target;
-       int index;
 };
 
 extern struct file *file_list;
index e7118d6..471a59a 100644 (file)
@@ -101,6 +101,7 @@ const char *menu_get_prompt(struct menu *menu);
 struct menu *menu_get_parent_menu(struct menu *menu);
 bool menu_has_help(struct menu *menu);
 const char *menu_get_help(struct menu *menu);
+int get_jump_key_char(void);
 struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head);
 void menu_get_ext_help(struct menu *menu, struct gstr *help);
 
index 347daf2..a501abf 100644 (file)
@@ -196,13 +196,9 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
                  int width, int pause);
-
-
-typedef void (*update_text_fn)(char *buf, size_t start, size_t end, void
-                              *_data);
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
-                  int initial_width, int *keys, int *_vscroll, int *_hscroll,
-                  update_text_fn update_text, void *data);
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+                  int initial_width, int *_vscroll, int *_hscroll,
+                  int (*extra_key_cb)(int, size_t, size_t, void *), void *data);
 int dialog_menu(const char *title, const char *prompt,
                const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
index bc4d4fb..058ed0e 100644 (file)
@@ -10,8 +10,8 @@
 
 static int hscroll;
 static int begin_reached, end_reached, page_length;
-static char *buf;
-static char *page;
+static const char *buf, *page;
+static size_t start, end;
 
 /*
  * Go back 'n' lines in text. Called by dialog_textbox().
@@ -98,21 +98,10 @@ static void print_line(WINDOW *win, int row, int width)
 /*
  * Print a new page of text.
  */
-static void print_page(WINDOW *win, int height, int width, update_text_fn
-                      update_text, void *data)
+static void print_page(WINDOW *win, int height, int width)
 {
        int i, passed_end = 0;
 
-       if (update_text) {
-               char *end;
-
-               for (i = 0; i < height; i++)
-                       get_line();
-               end = page;
-               back_lines(height);
-               update_text(buf, page - buf, end - buf, data);
-       }
-
        page_length = 0;
        for (i = 0; i < height; i++) {
                print_line(win, i, width);
@@ -142,24 +131,26 @@ static void print_position(WINDOW *win)
  * refresh window content
  */
 static void refresh_text_box(WINDOW *dialog, WINDOW *box, int boxh, int boxw,
-                            int cur_y, int cur_x, update_text_fn update_text,
-                            void *data)
+                            int cur_y, int cur_x)
 {
-       print_page(box, boxh, boxw, update_text, data);
+       start = page - buf;
+
+       print_page(box, boxh, boxw);
        print_position(dialog);
        wmove(dialog, cur_y, cur_x);    /* Restore cursor position */
        wrefresh(dialog);
+
+       end = page - buf;
 }
 
 /*
  * Display text from a file in a dialog box.
  *
  * keys is a null-terminated array
- * update_text() may not add or remove any '\n' or '\0' in tbuf
  */
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
-                  int initial_width, int *keys, int *_vscroll, int *_hscroll,
-                  update_text_fn update_text, void *data)
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+                  int initial_width, int *_vscroll, int *_hscroll,
+                  int (*extra_key_cb)(int, size_t, size_t, void *), void *data)
 {
        int i, x, y, cur_x, cur_y, key = 0;
        int height, width, boxh, boxw;
@@ -239,8 +230,7 @@ do_resize:
 
        /* Print first page of text */
        attr_clear(box, boxh, boxw, dlg.dialog.atr);
-       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x, update_text,
-                        data);
+       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
 
        while (!done) {
                key = wgetch(dialog);
@@ -259,8 +249,7 @@ do_resize:
                                begin_reached = 1;
                                page = buf;
                                refresh_text_box(dialog, box, boxh, boxw,
-                                                cur_y, cur_x, update_text,
-                                                data);
+                                                cur_y, cur_x);
                        }
                        break;
                case 'G':       /* Last page */
@@ -270,8 +259,7 @@ do_resize:
                        /* point to last char in buf */
                        page = buf + strlen(buf);
                        back_lines(boxh);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'K':       /* Previous line */
                case 'k':
@@ -280,8 +268,7 @@ do_resize:
                                break;
 
                        back_lines(page_length + 1);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'B':       /* Previous page */
                case 'b':
@@ -290,8 +277,7 @@ do_resize:
                        if (begin_reached)
                                break;
                        back_lines(page_length + boxh);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'J':       /* Next line */
                case 'j':
@@ -300,8 +286,7 @@ do_resize:
                                break;
 
                        back_lines(page_length - 1);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case KEY_NPAGE: /* Next page */
                case ' ':
@@ -310,8 +295,7 @@ do_resize:
                                break;
 
                        begin_reached = 0;
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case '0':       /* Beginning of line */
                case 'H':       /* Scroll left */
@@ -326,8 +310,7 @@ do_resize:
                                hscroll--;
                        /* Reprint current page to scroll horizontally */
                        back_lines(page_length);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'L':       /* Scroll right */
                case 'l':
@@ -337,8 +320,7 @@ do_resize:
                        hscroll++;
                        /* Reprint current page to scroll horizontally */
                        back_lines(page_length);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case KEY_ESC:
                        if (on_key_esc(dialog) == KEY_ESC)
@@ -351,11 +333,9 @@ do_resize:
                        on_key_resize();
                        goto do_resize;
                default:
-                       for (i = 0; keys[i]; i++) {
-                               if (key == keys[i]) {
-                                       done = true;
-                                       break;
-                               }
+                       if (extra_key_cb && extra_key_cb(key, start, end, data)) {
+                               done = true;
+                               break;
                        }
                }
        }
index 53d8834..eccc87a 100644 (file)
@@ -22,8 +22,6 @@
 #include "lkc.h"
 #include "lxdialog/dialog.h"
 
-#define JUMP_NB                        9
-
 static const char mconf_readme[] =
 "Overview\n"
 "--------\n"
@@ -288,6 +286,7 @@ static int single_menu_mode;
 static int show_all_options;
 static int save_and_exit;
 static int silent;
+static int jump_key_char;
 
 static void conf(struct menu *menu, struct menu *active_menu);
 
@@ -348,19 +347,19 @@ static void reset_subtitle(void)
        set_dialog_subtitles(subtitles);
 }
 
-static int show_textbox_ext(const char *title, char *text, int r, int c, int
-                           *keys, int *vscroll, int *hscroll, update_text_fn
-                           update_text, void *data)
+static int show_textbox_ext(const char *title, const char *text, int r, int c,
+                           int *vscroll, int *hscroll,
+                           int (*extra_key_cb)(int, size_t, size_t, void *),
+                           void *data)
 {
        dialog_clear();
-       return dialog_textbox(title, text, r, c, keys, vscroll, hscroll,
-                             update_text, data);
+       return dialog_textbox(title, text, r, c, vscroll, hscroll,
+                             extra_key_cb, data);
 }
 
 static void show_textbox(const char *title, const char *text, int r, int c)
 {
-       show_textbox_ext(title, (char *) text, r, c, (int []) {0}, NULL, NULL,
-                        NULL, NULL);
+       show_textbox_ext(title, text, r, c, NULL, NULL, NULL, NULL);
 }
 
 static void show_helptext(const char *title, const char *text)
@@ -381,35 +380,54 @@ static void show_help(struct menu *menu)
 
 struct search_data {
        struct list_head *head;
-       struct menu **targets;
-       int *keys;
+       struct menu *target;
 };
 
-static void update_text(char *buf, size_t start, size_t end, void *_data)
+static int next_jump_key(int key)
+{
+       if (key < '1' || key > '9')
+               return '1';
+
+       key++;
+
+       if (key > '9')
+               key = '1';
+
+       return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
 {
        struct search_data *data = _data;
        struct jump_key *pos;
-       int k = 0;
+       int index = 0;
+
+       if (key < '1' || key > '9')
+               return 0;
 
        list_for_each_entry(pos, data->head, entries) {
-               if (pos->offset >= start && pos->offset < end) {
-                       char header[4];
+               index = next_jump_key(index);
 
-                       if (k < JUMP_NB) {
-                               int key = '0' + (pos->index % JUMP_NB) + 1;
+               if (pos->offset < start)
+                       continue;
 
-                               sprintf(header, "(%c)", key);
-                               data->keys[k] = key;
-                               data->targets[k] = pos->target;
-                               k++;
-                       } else {
-                               sprintf(header, "   ");
-                       }
+               if (pos->offset >= end)
+                       break;
 
-                       memcpy(buf + pos->offset, header, sizeof(header) - 1);
+               if (key == index) {
+                       data->target = pos->target;
+                       return 1;
                }
        }
-       data->keys[k] = 0;
+
+       return 0;
+}
+
+int get_jump_key_char(void)
+{
+       jump_key_char = next_jump_key(jump_key_char);
+
+       return jump_key_char;
 }
 
 static void search_conf(void)
@@ -456,26 +474,23 @@ again:
        sym_arr = sym_re_search(dialog_input);
        do {
                LIST_HEAD(head);
-               struct menu *targets[JUMP_NB];
-               int keys[JUMP_NB + 1], i;
                struct search_data data = {
                        .head = &head,
-                       .targets = targets,
-                       .keys = keys,
                };
                struct jump_key *pos, *tmp;
 
+               jump_key_char = 0;
                res = get_relations_str(sym_arr, &head);
                set_subtitle();
                dres = show_textbox_ext("Search Results", str_get(&res), 0, 0,
-                                       keys, &vscroll, &hscroll, &update_text,
-                                       &data);
+                                       &vscroll, &hscroll,
+                                       handle_search_keys, &data);
                again = false;
-               for (i = 0; i < JUMP_NB && keys[i]; i++)
-                       if (dres == keys[i]) {
-                               conf(targets[i]->parent, targets[i]);
-                               again = true;
-                       }
+               if (dres >= '1' && dres <= '9') {
+                       assert(data.target != NULL);
+                       conf(data.target->parent, data.target);
+                       again = true;
+               }
                str_free(&res);
                list_for_each_entry_safe(pos, tmp, &head, entries)
                        free(pos);
index b90fff8..61c442d 100644 (file)
@@ -701,6 +701,11 @@ static void get_dep_str(struct gstr *r, struct expr *expr, const char *prefix)
        }
 }
 
+int __attribute__((weak)) get_jump_key_char(void)
+{
+       return -1;
+}
+
 static void get_prompt_str(struct gstr *r, struct property *prop,
                           struct list_head *head)
 {
@@ -730,24 +735,27 @@ static void get_prompt_str(struct gstr *r, struct property *prop,
        }
        if (head && location) {
                jump = xmalloc(sizeof(struct jump_key));
-
                jump->target = location;
-
-               if (list_empty(head))
-                       jump->index = 0;
-               else
-                       jump->index = list_entry(head->prev, struct jump_key,
-                                                entries)->index + 1;
-
                list_add_tail(&jump->entries, head);
        }
 
        str_printf(r, "  Location:\n");
-       for (j = 4; --i >= 0; j += 2) {
+       for (j = 0; --i >= 0; j++) {
+               int jk = -1;
+               int indent = 2 * j + 4;
+
                menu = submenu[i];
-               if (jump && menu == location)
+               if (jump && menu == location) {
                        jump->offset = strlen(r->s);
-               str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu));
+                       jk = get_jump_key_char();
+               }
+
+               if (jk >= 0) {
+                       str_printf(r, "(%c)", jk);
+                       indent -= 3;
+               }
+
+               str_printf(r, "%*c-> %s", indent, ' ', menu_get_prompt(menu));
                if (menu->sym) {
                        str_printf(r, " (%s [=%s])", menu->sym->name ?
                                menu->sym->name : "<choice>",
index 3ba8b1a..143a2c3 100644 (file)
@@ -220,7 +220,7 @@ search_help[] =
 "Location:\n"
 "  -> Bus options (PCI, PCMCIA, EISA, ISA)\n"
 "    -> PCI support (PCI [ = y])\n"
-"      -> PCI access mode (<choice> [ = y])\n"
+"(1)   -> PCI access mode (<choice> [ = y])\n"
 "Selects: LIBCRC32\n"
 "Selected by: BAR\n"
 "-----------------------------------------------------------------\n"
@@ -231,9 +231,13 @@ search_help[] =
 "o  The 'Depends on:' line lists symbols that need to be defined for\n"
 "   this symbol to be visible and selectable in the menu.\n"
 "o  The 'Location:' lines tell, where in the menu structure this symbol\n"
-"   is located.  A location followed by a [ = y] indicates that this is\n"
-"   a selectable menu item, and the current value is displayed inside\n"
-"   brackets.\n"
+"   is located.\n"
+"     A location followed by a [ = y] indicates that this is\n"
+"     a selectable menu item, and the current value is displayed inside\n"
+"     brackets.\n"
+"     Press the key in the (#) prefix to jump directly to that\n"
+"     location. You will be returned to the current search results\n"
+"     after exiting this new menu.\n"
 "o  The 'Selects:' line tells, what symbol will be automatically selected\n"
 "   if this symbol is selected (y or m).\n"
 "o  The 'Selected by' line tells what symbol has selected this symbol.\n"
@@ -275,7 +279,9 @@ static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
+static int jump_key_char;
 
+static void selected_conf(struct menu *menu, struct menu *active_menu);
 static void conf(struct menu *menu);
 static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
@@ -685,6 +691,57 @@ static int do_exit(void)
        return 0;
 }
 
+struct search_data {
+       struct list_head *head;
+       struct menu *target;
+};
+
+static int next_jump_key(int key)
+{
+       if (key < '1' || key > '9')
+               return '1';
+
+       key++;
+
+       if (key > '9')
+               key = '1';
+
+       return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
+{
+       struct search_data *data = _data;
+       struct jump_key *pos;
+       int index = 0;
+
+       if (key < '1' || key > '9')
+               return 0;
+
+       list_for_each_entry(pos, data->head, entries) {
+               index = next_jump_key(index);
+
+               if (pos->offset < start)
+                       continue;
+
+               if (pos->offset >= end)
+                       break;
+
+               if (key == index) {
+                       data->target = pos->target;
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+int get_jump_key_char(void)
+{
+       jump_key_char = next_jump_key(jump_key_char);
+
+       return jump_key_char;
+}
 
 static void search_conf(void)
 {
@@ -692,7 +749,8 @@ static void search_conf(void)
        struct gstr res;
        struct gstr title;
        char *dialog_input;
-       int dres;
+       int dres, vscroll = 0, hscroll = 0;
+       bool again;
 
        title = str_new();
        str_printf( &title, "Enter (sub)string or regexp to search for "
@@ -721,11 +779,28 @@ again:
                dialog_input += strlen(CONFIG_);
 
        sym_arr = sym_re_search(dialog_input);
-       res = get_relations_str(sym_arr, NULL);
+
+       do {
+               LIST_HEAD(head);
+               struct search_data data = {
+                       .head = &head,
+                       .target = NULL,
+               };
+               jump_key_char = 0;
+               res = get_relations_str(sym_arr, &head);
+               dres = show_scroll_win_ext(main_window,
+                               "Search Results", str_get(&res),
+                               &vscroll, &hscroll,
+                               handle_search_keys, &data);
+               again = false;
+               if (dres >= '1' && dres <= '9') {
+                       assert(data.target != NULL);
+                       selected_conf(data.target->parent, data.target);
+                       again = true;
+               }
+               str_free(&res);
+       } while (again);
        free(sym_arr);
-       show_scroll_win(main_window,
-                       "Search Results", str_get(&res));
-       str_free(&res);
        str_free(&title);
 }
 
@@ -1063,9 +1138,14 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
+       selected_conf(menu, NULL);
+}
+
+static void selected_conf(struct menu *menu, struct menu *active_menu)
+{
        struct menu *submenu = NULL;
        struct symbol *sym;
-       int res;
+       int i, res;
        int current_index = 0;
        int last_top_row = 0;
        struct match_state match_state = {
@@ -1081,6 +1161,19 @@ static void conf(struct menu *menu)
                if (!child_count)
                        break;
 
+               if (active_menu != NULL) {
+                       for (i = 0; i < items_num; i++) {
+                               struct mitem *mcur;
+
+                               mcur = (struct mitem *) item_userptr(curses_menu_items[i]);
+                               if ((struct menu *) mcur->usrptr == active_menu) {
+                                       current_index = i;
+                                       break;
+                               }
+                       }
+                       active_menu = NULL;
+               }
+
                show_menu(menu_get_prompt(menu), menu_instructions,
                          current_index, &last_top_row);
                keypad((menu_win(curses_menu)), TRUE);
index 9aedf40..25a7263 100644 (file)
@@ -497,11 +497,18 @@ void refresh_all_windows(WINDOW *main_window)
        refresh();
 }
 
-/* layman's scrollable window... */
 void show_scroll_win(WINDOW *main_window,
                const char *title,
                const char *text)
 {
+       (void)show_scroll_win_ext(main_window, title, (char *)text, NULL, NULL, NULL, NULL);
+}
+
+/* layman's scrollable window... */
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+                       int *vscroll, int *hscroll,
+                       extra_key_cb_fn extra_key_cb, void *data)
+{
        int res;
        int total_lines = get_line_no(text);
        int x, y, lines, columns;
@@ -514,6 +521,12 @@ void show_scroll_win(WINDOW *main_window,
        WINDOW *win;
        WINDOW *pad;
        PANEL *panel;
+       bool done = false;
+
+       if (hscroll)
+               start_x = *hscroll;
+       if (vscroll)
+               start_y = *vscroll;
 
        getmaxyx(stdscr, lines, columns);
 
@@ -549,8 +562,7 @@ void show_scroll_win(WINDOW *main_window,
        panel = new_panel(win);
 
        /* handle scrolling */
-       do {
-
+       while (!done) {
                copywin(pad, win, start_y, start_x, 2, 2, text_lines,
                                text_cols, 0);
                print_in_middle(win,
@@ -593,8 +605,18 @@ void show_scroll_win(WINDOW *main_window,
                case 'l':
                        start_x++;
                        break;
+               default:
+                       if (extra_key_cb) {
+                               size_t start = (get_line(text, start_y) - text);
+                               size_t end = (get_line(text, start_y + text_lines) - text);
+
+                               if (extra_key_cb(res, start, end, data)) {
+                                       done = true;
+                                       break;
+                               }
+                       }
                }
-               if (res == 10 || res == 27 || res == 'q' ||
+               if (res == 0 || res == 10 || res == 27 || res == 'q' ||
                        res == KEY_F(F_HELP) || res == KEY_F(F_BACK) ||
                        res == KEY_F(F_EXIT))
                        break;
@@ -606,9 +628,14 @@ void show_scroll_win(WINDOW *main_window,
                        start_x = 0;
                if (start_x >= total_cols-text_cols)
                        start_x = total_cols-text_cols;
-       } while (res);
+       }
 
+       if (hscroll)
+               *hscroll = start_x;
+       if (vscroll)
+               *vscroll = start_y;
        del_panel(panel);
        delwin(win);
        refresh_all_windows(main_window);
+       return res;
 }
index 6f925bc..ab836d5 100644 (file)
@@ -67,6 +67,8 @@ typedef enum {
 
 void set_colors(void);
 
+typedef int (*extra_key_cb_fn)(int, size_t, size_t, void *);
+
 /* this changes the windows attributes !!! */
 void print_in_middle(WINDOW *win, int y, int width, const char *str, int attrs);
 int get_line_length(const char *line);
@@ -78,6 +80,9 @@ int dialog_inputbox(WINDOW *main_window,
                const char *title, const char *prompt,
                const char *init, char **resultp, int *result_len);
 void refresh_all_windows(WINDOW *main_window);
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+                       int *vscroll, int *hscroll,
+                       extra_key_cb_fn extra_key_cb, void *data);
 void show_scroll_win(WINDOW *main_window,
                const char *title,
                const char *text);
index 748da57..d1f5bcf 100644 (file)
@@ -396,6 +396,9 @@ static char *eval_clause(const char *str, size_t len, int argc, char *argv[])
 
                p++;
        }
+
+       if (new_argc >= FUNCTION_MAX_ARGS)
+               pperror("too many function arguments");
        new_argv[new_argc++] = prev;
 
        /*
index 117f36e..0e113b0 100755 (executable)
@@ -5,7 +5,8 @@ cflags=$1
 libs=$2
 bin=$3
 
-PKG="Qt5Core Qt5Gui Qt5Widgets"
+PKG5="Qt5Core Qt5Gui Qt5Widgets"
+PKG6="Qt6Core Qt6Gui Qt6Widgets"
 
 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
        echo >&2 "*"
@@ -14,16 +15,26 @@ if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
        exit 1
 fi
 
-if ${HOSTPKG_CONFIG} --exists $PKG; then
-       ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
-       ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
+if ${HOSTPKG_CONFIG} --exists $PKG6; then
+       ${HOSTPKG_CONFIG} --cflags ${PKG6} > ${cflags}
+       # Qt6 requires C++17.
+       echo -std=c++17 >> ${cflags}
+       ${HOSTPKG_CONFIG} --libs ${PKG6} > ${libs}
+       ${HOSTPKG_CONFIG} --variable=libexecdir Qt6Core > ${bin}
+       exit 0
+fi
+
+if ${HOSTPKG_CONFIG} --exists $PKG5; then
+       ${HOSTPKG_CONFIG} --cflags ${PKG5} > ${cflags}
+       ${HOSTPKG_CONFIG} --libs ${PKG5} > ${libs}
        ${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin}
        exit 0
 fi
 
 echo >&2 "*"
-echo >&2 "* Could not find Qt5 via ${HOSTPKG_CONFIG}."
-echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH"
-echo >&2 "* You need $PKG"
+echo >&2 "* Could not find Qt6 or Qt5 via ${HOSTPKG_CONFIG}."
+echo >&2 "* Please install Qt6 or Qt5 and make sure it's in PKG_CONFIG_PATH"
+echo >&2 "* You need $PKG6 for Qt6"
+echo >&2 "* You need $PKG5 for Qt5"
 echo >&2 "*"
 exit 1
index 78087b2..620a352 100644 (file)
@@ -5,10 +5,10 @@
  */
 
 #include <QAction>
+#include <QActionGroup>
 #include <QApplication>
 #include <QCloseEvent>
 #include <QDebug>
-#include <QDesktopWidget>
 #include <QFileDialog>
 #include <QLabel>
 #include <QLayout>
@@ -16,6 +16,8 @@
 #include <QMenu>
 #include <QMenuBar>
 #include <QMessageBox>
+#include <QRegularExpression>
+#include <QScreen>
 #include <QToolBar>
 
 #include <stdlib.h>
@@ -1126,7 +1128,7 @@ QString ConfigInfoView::debug_info(struct symbol *sym)
 
 QString ConfigInfoView::print_filter(const QString &str)
 {
-       QRegExp re("[<>&\"\\n]");
+       QRegularExpression re("[<>&\"\\n]");
        QString res = str;
        for (int i = 0; (i = res.indexOf(re, i)) >= 0;) {
                switch (res[i].toLatin1()) {
@@ -1322,15 +1324,15 @@ ConfigMainWindow::ConfigMainWindow(void)
        int width, height;
        char title[256];
 
-       QDesktopWidget *d = configApp->desktop();
        snprintf(title, sizeof(title), "%s%s",
                rootmenu.prompt->text,
                ""
                );
        setWindowTitle(title);
 
-       width = configSettings->value("/window width", d->width() - 64).toInt();
-       height = configSettings->value("/window height", d->height() - 64).toInt();
+       QRect g = configApp->primaryScreen()->geometry();
+       width = configSettings->value("/window width", g.width() - 64).toInt();
+       height = configSettings->value("/window height", g.height() - 64).toInt();
        resize(width, height);
        x = configSettings->value("/window x");
        y = configSettings->value("/window y");
@@ -1379,17 +1381,17 @@ ConfigMainWindow::ConfigMainWindow(void)
                this, &ConfigMainWindow::goBack);
 
        QAction *quitAction = new QAction("&Quit", this);
-       quitAction->setShortcut(Qt::CTRL + Qt::Key_Q);
+       quitAction->setShortcut(Qt::CTRL | Qt::Key_Q);
        connect(quitAction, &QAction::triggered,
                this, &ConfigMainWindow::close);
 
        QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this);
-       loadAction->setShortcut(Qt::CTRL + Qt::Key_L);
+       loadAction->setShortcut(Qt::CTRL | Qt::Key_L);
        connect(loadAction, &QAction::triggered,
                this, &ConfigMainWindow::loadConfig);
 
        saveAction = new QAction(QPixmap(xpm_save), "&Save", this);
-       saveAction->setShortcut(Qt::CTRL + Qt::Key_S);
+       saveAction->setShortcut(Qt::CTRL | Qt::Key_S);
        connect(saveAction, &QAction::triggered,
                this, &ConfigMainWindow::saveConfig);
 
@@ -1403,7 +1405,7 @@ ConfigMainWindow::ConfigMainWindow(void)
        connect(saveAsAction, &QAction::triggered,
                this, &ConfigMainWindow::saveConfigAs);
        QAction *searchAction = new QAction("&Find", this);
-       searchAction->setShortcut(Qt::CTRL + Qt::Key_F);
+       searchAction->setShortcut(Qt::CTRL | Qt::Key_F);
        connect(searchAction, &QAction::triggered,
                this, &ConfigMainWindow::searchConfig);
        singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this);
@@ -1750,11 +1752,21 @@ void ConfigMainWindow::closeEvent(QCloseEvent* e)
                e->accept();
                return;
        }
-       QMessageBox mb("qconf", "Save configuration?", QMessageBox::Warning,
-                       QMessageBox::Yes | QMessageBox::Default, QMessageBox::No, QMessageBox::Cancel | QMessageBox::Escape);
-       mb.setButtonText(QMessageBox::Yes, "&Save Changes");
-       mb.setButtonText(QMessageBox::No, "&Discard Changes");
-       mb.setButtonText(QMessageBox::Cancel, "Cancel Exit");
+
+       QMessageBox mb(QMessageBox::Icon::Warning, "qconf",
+                      "Save configuration?");
+
+       QPushButton *yb = mb.addButton(QMessageBox::Yes);
+       QPushButton *db = mb.addButton(QMessageBox::No);
+       QPushButton *cb = mb.addButton(QMessageBox::Cancel);
+
+       yb->setText("&Save Changes");
+       db->setText("&Discard Changes");
+       cb->setText("Cancel Exit");
+
+       mb.setDefaultButton(yb);
+       mb.setEscapeButton(cb);
+
        switch (mb.exec()) {
        case QMessageBox::Yes:
                if (saveConfig())
index b29b297..34a5386 100644 (file)
@@ -24,6 +24,7 @@
 #include "../../include/linux/license.h"
 #include "../../include/linux/module_symbol.h"
 
+static bool module_enabled;
 /* Are we using CONFIG_MODVERSIONS? */
 static bool modversions;
 /* Is CONFIG_MODULE_SRCVERSION_ALL set? */
@@ -761,6 +762,7 @@ static const char *const section_white_list[] =
        ".fmt_slot*",                   /* EZchip */
        ".gnu.lto*",
        ".discard.*",
+       ".llvm.call-graph-profile",     /* call graph */
        NULL
 };
 
@@ -1242,7 +1244,7 @@ static void check_section_mismatch(struct module *mod, struct elf_info *elf,
        const char *tosec = sec_name(elf, get_secindex(elf, sym));
        const struct sectioncheck *mismatch;
 
-       if (elf->export_symbol_secndx == fsecndx) {
+       if (module_enabled && elf->export_symbol_secndx == fsecndx) {
                check_export_symbol(mod, elf, faddr, tosec, sym);
                return;
        }
@@ -1256,21 +1258,16 @@ static void check_section_mismatch(struct module *mod, struct elf_info *elf,
                                 tosec, taddr);
 }
 
-static int addend_386_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_386_rel(uint32_t *location, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
-
-       switch (r_typ) {
+       switch (r_type) {
        case R_386_32:
-               r->r_addend = TO_NATIVE(*location);
-               break;
+               return TO_NATIVE(*location);
        case R_386_PC32:
-               r->r_addend = TO_NATIVE(*location) + 4;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return TO_NATIVE(*location) + 4;
        }
-       return 0;
+
+       return (Elf_Addr)(-1);
 }
 
 #ifndef R_ARM_CALL
@@ -1314,32 +1311,28 @@ static int32_t sign_extend32(int32_t value, int index)
        return (int32_t)(value << shift) >> shift;
 }
 
-static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
+static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
        uint32_t inst, upper, lower, sign, j1, j2;
        int32_t offset;
 
-       switch (r_typ) {
+       switch (r_type) {
        case R_ARM_ABS32:
        case R_ARM_REL32:
                inst = TO_NATIVE(*(uint32_t *)loc);
-               r->r_addend = inst + sym->st_value;
-               break;
+               return inst + sym->st_value;
        case R_ARM_MOVW_ABS_NC:
        case R_ARM_MOVT_ABS:
                inst = TO_NATIVE(*(uint32_t *)loc);
                offset = sign_extend32(((inst & 0xf0000) >> 4) | (inst & 0xfff),
                                       15);
-               r->r_addend = offset + sym->st_value;
-               break;
+               return offset + sym->st_value;
        case R_ARM_PC24:
        case R_ARM_CALL:
        case R_ARM_JUMP24:
                inst = TO_NATIVE(*(uint32_t *)loc);
                offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
-               r->r_addend = offset + sym->st_value + 8;
-               break;
+               return offset + sym->st_value + 8;
        case R_ARM_THM_MOVW_ABS_NC:
        case R_ARM_THM_MOVT_ABS:
                upper = TO_NATIVE(*(uint16_t *)loc);
@@ -1349,8 +1342,7 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((lower & 0x7000) >> 4) |
                                       (lower & 0x00ff),
                                       15);
-               r->r_addend = offset + sym->st_value;
-               break;
+               return offset + sym->st_value;
        case R_ARM_THM_JUMP19:
                /*
                 * Encoding T3:
@@ -1371,8 +1363,7 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((upper & 0x03f) << 12) |
                                       ((lower & 0x07ff) << 1),
                                       20);
-               r->r_addend = offset + sym->st_value + 4;
-               break;
+               return offset + sym->st_value + 4;
        case R_ARM_THM_CALL:
        case R_ARM_THM_JUMP24:
                /*
@@ -1398,34 +1389,26 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((upper & 0x03ff) << 12) |
                                       ((lower & 0x07ff) << 1),
                                       24);
-               r->r_addend = offset + sym->st_value + 4;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return offset + sym->st_value + 4;
        }
-       return 0;
+
+       return (Elf_Addr)(-1);
 }
 
-static int addend_mips_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
        uint32_t inst;
 
        inst = TO_NATIVE(*location);
-       switch (r_typ) {
+       switch (r_type) {
        case R_MIPS_LO16:
-               r->r_addend = inst & 0xffff;
-               break;
+               return inst & 0xffff;
        case R_MIPS_26:
-               r->r_addend = (inst & 0x03ffffff) << 2;
-               break;
+               return (inst & 0x03ffffff) << 2;
        case R_MIPS_32:
-               r->r_addend = inst;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return inst;
        }
-       return 0;
+       return (Elf_Addr)(-1);
 }
 
 #ifndef EM_RISCV
@@ -1444,12 +1427,45 @@ static int addend_mips_rel(uint32_t *location, Elf_Rela *r)
 #define R_LARCH_SUB32          55
 #endif
 
+static void get_rel_type_and_sym(struct elf_info *elf, uint64_t r_info,
+                                unsigned int *r_type, unsigned int *r_sym)
+{
+       typedef struct {
+               Elf64_Word    r_sym;    /* Symbol index */
+               unsigned char r_ssym;   /* Special symbol for 2nd relocation */
+               unsigned char r_type3;  /* 3rd relocation type */
+               unsigned char r_type2;  /* 2nd relocation type */
+               unsigned char r_type;   /* 1st relocation type */
+       } Elf64_Mips_R_Info;
+
+       bool is_64bit = (elf->hdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+       if (elf->hdr->e_machine == EM_MIPS && is_64bit) {
+               Elf64_Mips_R_Info *mips64_r_info = (void *)&r_info;
+
+               *r_type = mips64_r_info->r_type;
+               *r_sym = TO_NATIVE(mips64_r_info->r_sym);
+               return;
+       }
+
+       if (is_64bit) {
+               Elf64_Xword r_info64 = r_info;
+
+               r_info = TO_NATIVE(r_info64);
+       } else {
+               Elf32_Word r_info32 = r_info;
+
+               r_info = TO_NATIVE(r_info32);
+       }
+
+       *r_type = ELF_R_TYPE(r_info);
+       *r_sym = ELF_R_SYM(r_info);
+}
+
 static void section_rela(struct module *mod, struct elf_info *elf,
                         Elf_Shdr *sechdr)
 {
        Elf_Rela *rela;
-       Elf_Rela r;
-       unsigned int r_sym;
        unsigned int fsecndx = sechdr->sh_info;
        const char *fromsec = sec_name(elf, fsecndx);
        Elf_Rela *start = (void *)elf->hdr + sechdr->sh_offset;
@@ -1460,38 +1476,29 @@ static void section_rela(struct module *mod, struct elf_info *elf,
                return;
 
        for (rela = start; rela < stop; rela++) {
-               r.r_offset = TO_NATIVE(rela->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
-               if (elf->hdr->e_machine == EM_MIPS) {
-                       unsigned int r_typ;
-                       r_sym = ELF64_MIPS_R_SYM(rela->r_info);
-                       r_sym = TO_NATIVE(r_sym);
-                       r_typ = ELF64_MIPS_R_TYPE(rela->r_info);
-                       r.r_info = ELF64_R_INFO(r_sym, r_typ);
-               } else {
-                       r.r_info = TO_NATIVE(rela->r_info);
-                       r_sym = ELF_R_SYM(r.r_info);
-               }
-#else
-               r.r_info = TO_NATIVE(rela->r_info);
-               r_sym = ELF_R_SYM(r.r_info);
-#endif
-               r.r_addend = TO_NATIVE(rela->r_addend);
+               Elf_Addr taddr, r_offset;
+               unsigned int r_type, r_sym;
+
+               r_offset = TO_NATIVE(rela->r_offset);
+               get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
+
+               taddr = TO_NATIVE(rela->r_addend);
+
                switch (elf->hdr->e_machine) {
                case EM_RISCV:
                        if (!strcmp("__ex_table", fromsec) &&
-                           ELF_R_TYPE(r.r_info) == R_RISCV_SUB32)
+                           r_type == R_RISCV_SUB32)
                                continue;
                        break;
                case EM_LOONGARCH:
                        if (!strcmp("__ex_table", fromsec) &&
-                           ELF_R_TYPE(r.r_info) == R_LARCH_SUB32)
+                           r_type == R_LARCH_SUB32)
                                continue;
                        break;
                }
 
                check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
-                                      fsecndx, fromsec, r.r_offset, r.r_addend);
+                                      fsecndx, fromsec, r_offset, taddr);
        }
 }
 
@@ -1499,8 +1506,6 @@ static void section_rel(struct module *mod, struct elf_info *elf,
                        Elf_Shdr *sechdr)
 {
        Elf_Rel *rel;
-       Elf_Rela r;
-       unsigned int r_sym;
        unsigned int fsecndx = sechdr->sh_info;
        const char *fromsec = sec_name(elf, fsecndx);
        Elf_Rel *start = (void *)elf->hdr + sechdr->sh_offset;
@@ -1512,45 +1517,32 @@ static void section_rel(struct module *mod, struct elf_info *elf,
 
        for (rel = start; rel < stop; rel++) {
                Elf_Sym *tsym;
+               Elf_Addr taddr = 0, r_offset;
+               unsigned int r_type, r_sym;
                void *loc;
 
-               r.r_offset = TO_NATIVE(rel->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
-               if (elf->hdr->e_machine == EM_MIPS) {
-                       unsigned int r_typ;
-                       r_sym = ELF64_MIPS_R_SYM(rel->r_info);
-                       r_sym = TO_NATIVE(r_sym);
-                       r_typ = ELF64_MIPS_R_TYPE(rel->r_info);
-                       r.r_info = ELF64_R_INFO(r_sym, r_typ);
-               } else {
-                       r.r_info = TO_NATIVE(rel->r_info);
-                       r_sym = ELF_R_SYM(r.r_info);
-               }
-#else
-               r.r_info = TO_NATIVE(rel->r_info);
-               r_sym = ELF_R_SYM(r.r_info);
-#endif
-               r.r_addend = 0;
+               r_offset = TO_NATIVE(rel->r_offset);
+               get_rel_type_and_sym(elf, rel->r_info, &r_type, &r_sym);
 
-               loc = sym_get_data_by_offset(elf, fsecndx, r.r_offset);
+               loc = sym_get_data_by_offset(elf, fsecndx, r_offset);
                tsym = elf->symtab_start + r_sym;
 
                switch (elf->hdr->e_machine) {
                case EM_386:
-                       addend_386_rel(loc, &r);
+                       taddr = addend_386_rel(loc, r_type);
                        break;
                case EM_ARM:
-                       addend_arm_rel(loc, tsym, &r);
+                       taddr = addend_arm_rel(loc, tsym, r_type);
                        break;
                case EM_MIPS:
-                       addend_mips_rel(loc, &r);
+                       taddr = addend_mips_rel(loc, r_type);
                        break;
                default:
                        fatal("Please add code to calculate addend for this architecture\n");
                }
 
                check_section_mismatch(mod, elf, tsym,
-                                      fsecndx, fromsec, r.r_offset, r.r_addend);
+                                      fsecndx, fromsec, r_offset, taddr);
        }
 }
 
@@ -2272,7 +2264,7 @@ int main(int argc, char **argv)
        LIST_HEAD(dump_lists);
        struct dump_list *dl, *dl2;
 
-       while ((opt = getopt(argc, argv, "ei:mnT:to:au:WwENd:")) != -1) {
+       while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) {
                switch (opt) {
                case 'e':
                        external_module = true;
@@ -2282,6 +2274,9 @@ int main(int argc, char **argv)
                        dl->file = optarg;
                        list_add_tail(&dl->list, &dump_lists);
                        break;
+               case 'M':
+                       module_enabled = true;
+                       break;
                case 'm':
                        modversions = true;
                        break;
index dfdb948..5f94c2c 100644 (file)
 #define ELF_R_TYPE  ELF64_R_TYPE
 #endif
 
-/* The 64-bit MIPS ELF ABI uses an unusual reloc format. */
-typedef struct
-{
-       Elf32_Word    r_sym;    /* Symbol index */
-       unsigned char r_ssym;   /* Special symbol for 2nd relocation */
-       unsigned char r_type3;  /* 3rd relocation type */
-       unsigned char r_type2;  /* 2nd relocation type */
-       unsigned char r_type1;  /* 1st relocation type */
-} _Elf64_Mips_R_Info;
-
-typedef union
-{
-       Elf64_Xword             r_info_number;
-       _Elf64_Mips_R_Info      r_info_fields;
-} _Elf64_Mips_R_Info_union;
-
-#define ELF64_MIPS_R_SYM(i) \
-  ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym)
-
-#define ELF64_MIPS_R_TYPE(i) \
-  ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1)
-
 #if KERNEL_ELFDATA != HOST_ELFDATA
 
 static inline void __endian(const void *src, void *dest, unsigned int size)
index 032774e..bf3f856 100755 (executable)
@@ -162,34 +162,7 @@ install_kernel_headers () {
 
        rm -rf $pdir
 
-       (
-               cd $srctree
-               find . arch/$SRCARCH -maxdepth 1 -name Makefile\*
-               find include scripts -type f -o -type l
-               find arch/$SRCARCH -name Kbuild.platforms -o -name Platform
-               find $(find arch/$SRCARCH -name include -o -name scripts -type d) -type f
-       ) > debian/hdrsrcfiles
-
-       {
-               if is_enabled CONFIG_OBJTOOL; then
-                       echo tools/objtool/objtool
-               fi
-
-               find arch/$SRCARCH/include Module.symvers include scripts -type f
-
-               if is_enabled CONFIG_GCC_PLUGINS; then
-                       find scripts/gcc-plugins -name \*.so
-               fi
-       } > debian/hdrobjfiles
-
-       destdir=$pdir/usr/src/linux-headers-$version
-       mkdir -p $destdir
-       tar -c -f - -C $srctree -T debian/hdrsrcfiles | tar -xf - -C $destdir
-       tar -c -f - -T debian/hdrobjfiles | tar -xf - -C $destdir
-       rm -f debian/hdrsrcfiles debian/hdrobjfiles
-
-       # copy .config manually to be where it's expected to be
-       cp $KCONFIG_CONFIG $destdir/.config
+       "${srctree}/scripts/package/install-extmod-build" "${pdir}/usr/src/linux-headers-${version}"
 
        mkdir -p $pdir/lib/modules/$version/
        ln -s /usr/src/linux-headers-$version $pdir/lib/modules/$version/build
diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules
new file mode 100755 (executable)
index 0000000..3dafa94
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/make -f
+# SPDX-License-Identifier: GPL-2.0-only
+
+include debian/rules.vars
+
+srctree ?= .
+
+ifneq (,$(filter-out parallel=1,$(filter parallel=%,$(DEB_BUILD_OPTIONS))))
+    NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
+    MAKEFLAGS += -j$(NUMJOBS)
+endif
+
+.PHONY: binary binary-indep binary-arch
+binary: binary-arch binary-indep
+binary-indep: build-indep
+binary-arch: build-arch
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+       KERNELRELEASE=$(KERNELRELEASE) \
+       run-command KBUILD_RUN_COMMAND=+$(srctree)/scripts/package/builddeb
+
+.PHONY: build build-indep build-arch
+build: build-arch build-indep
+build-indep:
+build-arch:
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+       KERNELRELEASE=$(KERNELRELEASE) \
+       $(shell $(srctree)/scripts/package/deb-build-option) \
+       olddefconfig all
+
+.PHONY: clean
+clean:
+       rm -rf debian/files debian/linux-*
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) clean
diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
new file mode 100755 (executable)
index 0000000..af7fe9f
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+set -e
+
+destdir=${1}
+
+test -n "${srctree}"
+test -n "${SRCARCH}"
+
+is_enabled() {
+       grep -q "^$1=y" include/config/auto.conf
+}
+
+mkdir -p "${destdir}"
+
+(
+       cd "${srctree}"
+       echo Makefile
+       find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*'
+       find include scripts -type f -o -type l
+       find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform
+       find "$(find "arch/${SRCARCH}" -name include -o -name scripts -type d)" -type f
+) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}"
+
+{
+       if is_enabled CONFIG_OBJTOOL; then
+               echo tools/objtool/objtool
+       fi
+
+       find "arch/${SRCARCH}/include" Module.symvers include scripts -type f
+
+       if is_enabled CONFIG_GCC_PLUGINS; then
+               find scripts/gcc-plugins -name '*.so'
+       fi
+} | tar -c -f - -T - | tar -xf - -C "${destdir}"
+
+# copy .config manually to be where it's expected to be
+cp "${KCONFIG_CONFIG}" "${destdir}/.config"
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
new file mode 100644 (file)
index 0000000..ac3f2ee
--- /dev/null
@@ -0,0 +1,117 @@
+# _arch is undefined if /usr/lib/rpm/platform/*/macros was not included.
+%{!?_arch: %define _arch dummy}
+%{!?make: %define make make}
+%define makeflags %{?_smp_mflags} ARCH=%{ARCH}
+%define __spec_install_post /usr/lib/rpm/brp-compress || :
+%define debug_package %{nil}
+
+Name: kernel
+Summary: The Linux Kernel
+Version: %(echo %{KERNELRELEASE} | sed -e 's/-/_/g')
+Release: %{pkg_release}
+License: GPL
+Group: System Environment/Kernel
+Vendor: The Linux Community
+URL: https://www.kernel.org
+Source0: linux.tar.gz
+Source1: config
+Source2: diff.patch
+Provides: kernel-%{KERNELRELEASE}
+BuildRequires: bc binutils bison dwarves
+BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
+BuildRequires: gcc make openssl openssl-devel perl python3 rsync
+
+%description
+The Linux Kernel, the operating system core itself
+
+%package headers
+Summary: Header files for the Linux kernel for use by glibc
+Group: Development/System
+Obsoletes: kernel-headers
+Provides: kernel-headers = %{version}
+%description headers
+Kernel-headers includes the C header files that specify the interface
+between the Linux kernel and userspace libraries and programs.  The
+header files define structures and constants that are needed for
+building most standard programs and are also needed for rebuilding the
+glibc package.
+
+%if %{with_devel}
+%package devel
+Summary: Development package for building kernel modules to match the %{version} kernel
+Group: System Environment/Kernel
+AutoReqProv: no
+%description -n kernel-devel
+This package provides kernel headers and makefiles sufficient to build modules
+against the %{version} kernel package.
+%endif
+
+%prep
+%setup -q -n linux
+cp %{SOURCE1} .config
+patch -p1 < %{SOURCE2}
+
+%build
+%{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release}
+
+%install
+mkdir -p %{buildroot}/boot
+%ifarch ia64
+mkdir -p %{buildroot}/boot/efi
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/efi/vmlinuz-%{KERNELRELEASE}
+ln -s efi/vmlinuz-%{KERNELRELEASE} %{buildroot}/boot/
+%else
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
+%endif
+%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
+cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
+cp .config %{buildroot}/boot/config-%{KERNELRELEASE}
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/source
+%if %{with_devel}
+%{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}'
+%endif
+
+%clean
+rm -rf %{buildroot}
+
+%post
+if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then
+cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm
+cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE}
+/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+fi
+
+%preun
+if [ -x /sbin/new-kernel-pkg ]; then
+new-kernel-pkg --remove %{KERNELRELEASE} --rminitrd --initrdfile=/boot/initramfs-%{KERNELRELEASE}.img
+elif [ -x /usr/bin/kernel-install ]; then
+kernel-install remove %{KERNELRELEASE}
+fi
+
+%postun
+if [ -x /sbin/update-bootloader ]; then
+/sbin/update-bootloader --remove %{KERNELRELEASE}
+fi
+
+%files
+%defattr (-, root, root)
+/lib/modules/%{KERNELRELEASE}
+%exclude /lib/modules/%{KERNELRELEASE}/build
+%exclude /lib/modules/%{KERNELRELEASE}/source
+/boot/*
+
+%files headers
+%defattr (-, root, root)
+/usr/include
+
+%if %{with_devel}
+%files devel
+%defattr (-, root, root)
+/usr/src/kernels/%{KERNELRELEASE}
+/lib/modules/%{KERNELRELEASE}/build
+/lib/modules/%{KERNELRELEASE}/source
+%endif
index ba2453e..5044224 100755 (executable)
@@ -263,34 +263,11 @@ Description: Linux kernel debugging symbols for $version
 EOF
 fi
 
-cat <<EOF > debian/rules
-#!$(command -v $MAKE) -f
-
-srctree ?= .
-KERNELRELEASE = ${KERNELRELEASE}
-
-.PHONY: clean build build-arch build-indep binary binary-arch binary-indep
-
-build-indep:
-build-arch:
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
-       KERNELRELEASE=\$(KERNELRELEASE) \
-       \$(shell \$(srctree)/scripts/package/deb-build-option) \
-       olddefconfig all
-
-build: build-arch
-
-binary-indep:
-binary-arch: build-arch
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
-       KERNELRELEASE=\$(KERNELRELEASE) intdeb-pkg
-
-clean:
-       rm -rf debian/files debian/linux-*
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} clean
-
-binary: binary-arch
+cat <<EOF > debian/rules.vars
+ARCH := ${ARCH}
+KERNELRELEASE := ${KERNELRELEASE}
 EOF
-chmod +x debian/rules
+
+cp "${srctree}/scripts/package/debian/rules" debian/
 
 exit 0
index 8049f0e..d41608e 100755 (executable)
 #      Patched for non-x86 by Opencon (L) 2002 <opencon@rio.skydome.net>
 #
 
-# how we were called determines which rpms we build and how we build them
-if [ "$1" = prebuilt ]; then
-       S=DEL
-       MAKE="$MAKE -f $srctree/Makefile"
-else
-       S=
-
-       mkdir -p rpmbuild/SOURCES
-       cp linux.tar.gz rpmbuild/SOURCES
-       cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
-       "${srctree}/scripts/package/gen-diff-patch" rpmbuild/SOURCES/diff.patch
-fi
-
 if grep -q CONFIG_MODULES=y include/config/auto.conf; then
-       M=
+echo '%define with_devel %{?_without_devel: 0} %{?!_without_devel: 1}'
 else
-       M=DEL
+echo '%define with_devel 0'
 fi
 
-__KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
-EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
---exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
---exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
-
-# We can label the here-doc lines for conditional output to the spec file
-#
-# Labels:
-#  $S: this line is enabled only when building source package
-#  $M: this line is enabled only when CONFIG_MODULES is enabled
-sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
-       Name: kernel
-       Summary: The Linux Kernel
-       Version: $__KERNELRELEASE
-       Release: $(cat .version 2>/dev/null || echo 1)
-       License: GPL
-       Group: System Environment/Kernel
-       Vendor: The Linux Community
-       URL: https://www.kernel.org
-$S     Source0: linux.tar.gz
-$S     Source1: config
-$S     Source2: diff.patch
-       Provides: kernel-$KERNELRELEASE
-$S     BuildRequires: bc binutils bison dwarves
-$S     BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
-$S     BuildRequires: gcc make openssl openssl-devel perl python3 rsync
-
-       # $UTS_MACHINE as a fallback of _arch in case
-       # /usr/lib/rpm/platform/*/macros was not included.
-       %define _arch %{?_arch:$UTS_MACHINE}
-       %define __spec_install_post /usr/lib/rpm/brp-compress || :
-       %define debug_package %{nil}
-
-       %description
-       The Linux Kernel, the operating system core itself
-
-       %package headers
-       Summary: Header files for the Linux kernel for use by glibc
-       Group: Development/System
-       Obsoletes: kernel-headers
-       Provides: kernel-headers = %{version}
-       %description headers
-       Kernel-headers includes the C header files that specify the interface
-       between the Linux kernel and userspace libraries and programs.  The
-       header files define structures and constants that are needed for
-       building most standard programs and are also needed for rebuilding the
-       glibc package.
-
-$S$M   %package devel
-$S$M   Summary: Development package for building kernel modules to match the $__KERNELRELEASE kernel
-$S$M   Group: System Environment/Kernel
-$S$M   AutoReqProv: no
-$S$M   %description -n kernel-devel
-$S$M   This package provides kernel headers and makefiles sufficient to build modules
-$S$M   against the $__KERNELRELEASE kernel package.
-$S$M
-$S     %prep
-$S     %setup -q -n linux
-$S     cp %{SOURCE1} .config
-$S     patch -p1 < %{SOURCE2}
-$S
-$S     %build
-$S     $MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}
-$S
-       %install
-       mkdir -p %{buildroot}/boot
-       %ifarch ia64
-       mkdir -p %{buildroot}/boot/efi
-       cp \$($MAKE -s image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
-       ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/
-       %else
-       cp \$($MAKE -s image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
-       %endif
-$M     $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install
-       $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
-       cp System.map %{buildroot}/boot/System.map-$KERNELRELEASE
-       cp .config %{buildroot}/boot/config-$KERNELRELEASE
-$S$M   rm -f %{buildroot}/lib/modules/$KERNELRELEASE/build
-$S$M   rm -f %{buildroot}/lib/modules/$KERNELRELEASE/source
-$S$M   mkdir -p %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M   tar cf - $EXCLUDES . | tar xf - -C %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M   cd %{buildroot}/lib/modules/$KERNELRELEASE
-$S$M   ln -sf /usr/src/kernels/$KERNELRELEASE build
-$S$M   ln -sf /usr/src/kernels/$KERNELRELEASE source
-
-       %clean
-       rm -rf %{buildroot}
-
-       %post
-       if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then
-       cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm
-       cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm
-       rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE
-       /sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
-       rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
-       fi
-
-       %preun
-       if [ -x /sbin/new-kernel-pkg ]; then
-       new-kernel-pkg --remove $KERNELRELEASE --rminitrd --initrdfile=/boot/initramfs-$KERNELRELEASE.img
-       elif [ -x /usr/bin/kernel-install ]; then
-       kernel-install remove $KERNELRELEASE
-       fi
-
-       %postun
-       if [ -x /sbin/update-bootloader ]; then
-       /sbin/update-bootloader --remove $KERNELRELEASE
-       fi
-
-       %files
-       %defattr (-, root, root)
-$M     /lib/modules/$KERNELRELEASE
-$M     %exclude /lib/modules/$KERNELRELEASE/build
-$M     %exclude /lib/modules/$KERNELRELEASE/source
-       /boot/*
-
-       %files headers
-       %defattr (-, root, root)
-       /usr/include
-$S$M
-$S$M   %files devel
-$S$M   %defattr (-, root, root)
-$S$M   /usr/src/kernels/$KERNELRELEASE
-$S$M   /lib/modules/$KERNELRELEASE/build
-$S$M   /lib/modules/$KERNELRELEASE/source
+cat<<EOF
+%define ARCH ${ARCH}
+%define KERNELRELEASE ${KERNELRELEASE}
+%define pkg_release $("${srctree}/init/build-version")
 EOF
+
+cat "${srctree}/scripts/package/kernel.spec"
index f3659ea..8b1a636 100755 (executable)
@@ -37,3 +37,5 @@ rm -f .scmversion
 rm -rf include/ksym
 
 find . -name '*.usyms' | xargs rm -f
+
+rm -f binkernel.spec
index 3d3baba..38b96c6 100755 (executable)
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # This scripts adds local version information from the version
-# control systems git, mercurial (hg) and subversion (svn).
+# control system git.
 #
 # If something goes wrong, send a mail the kernel build mailinglist
 # (see MAINTAINERS) and CC Nico Schottelius
@@ -57,21 +57,37 @@ scm_version()
                return
        fi
 
-       # If a localversion*' file and the corresponding annotated tag exist,
-       # use it. This is the case in linux-next.
+       # mainline kernel:  6.2.0-rc5  ->  v6.2-rc5
+       # stable kernel:    6.1.7      ->  v6.1.7
+       version_tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+
+       # If a localversion* file exists, and the corresponding
+       # annotated tag exists and is an ancestor of HEAD, use
+       # it. This is the case in linux-next.
        tag=${file_localversion#-}
-       tag=$(git describe --exact-match --match=$tag $tag 2>/dev/null)
+       desc=
+       if [ -n "${tag}" ]; then
+               desc=$(git describe --match=$tag 2>/dev/null)
+       fi
+
+       # Otherwise, if a localversion* file exists, and the tag
+       # obtained by appending it to the tag derived from
+       # KERNELVERSION exists and is an ancestor of HEAD, use
+       # it. This is e.g. the case in linux-rt.
+       if [ -z "${desc}" ] && [ -n "${file_localversion}" ]; then
+               tag="${version_tag}${file_localversion}"
+               desc=$(git describe --match=$tag 2>/dev/null)
+       fi
 
        # Otherwise, default to the annotated tag derived from KERNELVERSION.
-       #   mainline kernel:  6.2.0-rc5  ->  v6.2-rc5
-       #   stable kernel:    6.1.7      ->  v6.1.7
-       if [ -z "${tag}" ]; then
-               tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+       if [ -z "${desc}" ]; then
+               tag="${version_tag}"
+               desc=$(git describe --match=$tag 2>/dev/null)
        fi
 
        # If we are at the tagged commit, we ignore it because the version is
        # well-defined.
-       if [ -z "$(git describe --exact-match --match=$tag 2>/dev/null)" ]; then
+       if [ "${tag}" != "${desc}" ]; then
 
                # If only the short version is requested, don't bother
                # running further git commands
@@ -81,8 +97,8 @@ scm_version()
                fi
                # If we are past the tagged commit, we pretty print it.
                # (like 6.1.0-14595-g292a089d78d3)
-               if atag="$(git describe --match=$tag 2>/dev/null)"; then
-                       echo "$atag" | awk -F- '{printf("-%05d", $(NF-1))}'
+               if [ -n "${desc}" ]; then
+                       echo "${desc}" | awk -F- '{printf("-%05d", $(NF-1))}'
                fi
 
                # Add -g and exactly 12 hex chars.
index d43231b..55b1df8 100644 (file)
@@ -67,7 +67,7 @@ struct landlock_rule {
         * @layers: Stack of layers, from the latest to the newest, implemented
         * as a flexible array member (FAM).
         */
-       struct landlock_layer layers[];
+       struct landlock_layer layers[] __counted_by(num_layers);
 };
 
 /**
index 4859fb1..a11cd7d 100644 (file)
@@ -1992,8 +1992,8 @@ static int default_write_copy(struct snd_pcm_substream *substream,
                              int channel, unsigned long hwoff,
                              struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                           bytes, iter))
+       if (copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                          bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
@@ -2025,8 +2025,8 @@ static int default_read_copy(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
                             struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                         bytes, iter))
+       if (copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                        bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
index 174585b..b603bb9 100644 (file)
@@ -187,8 +187,13 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
        err = expand_var_event(event, 0, len, buf, in_kernel);
        if (err < 0)
                return err;
-       if (len != newlen)
-               memset(buf + len, 0, newlen - len);
+       if (len != newlen) {
+               if (in_kernel)
+                       memset(buf + len, 0, newlen - len);
+               else if (clear_user((__force void __user *)buf + len,
+                                   newlen - len))
+                       return -EFAULT;
+       }
        return newlen;
 }
 EXPORT_SYMBOL(snd_seq_expand_var_event);
index c05935c..9234d4f 100644 (file)
@@ -456,7 +456,7 @@ static int emu8k_pcm_silence(struct snd_pcm_substream *subs,
        /* convert to word unit */
        pos = (pos << 1) + rec->loop_start[voice];
        count <<= 1;
-       LOOP_WRITE(rec, pos, USER_SOCKPTR(NULL), count);
+       LOOP_WRITE(rec, pos, NULL, count);
        return 0;
 }
 
index 0ba1fbc..6278999 100644 (file)
@@ -888,7 +888,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
 
        /* Initialize CS42L42 companion codec */
        cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num);
-       usleep_range(30000, 35000);
+       msleep(CS42L42_INIT_TIMEOUT_MS);
 
        /* Clear interrupts, by reading interrupt status registers */
        cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs));
index 2a8dfb4..937e938 100644 (file)
@@ -229,6 +229,7 @@ enum cs8409_coefficient_index_registers {
 #define CS42L42_I2C_SLEEP_US                   (2000)
 #define CS42L42_PDN_TIMEOUT_US                 (250000)
 #define CS42L42_PDN_SLEEP_US                   (2000)
+#define CS42L42_INIT_TIMEOUT_MS                        (45)
 #define CS42L42_FULL_SCALE_VOL_MASK            (2)
 #define CS42L42_FULL_SCALE_VOL_0DB             (1)
 #define CS42L42_FULL_SCALE_VOL_MINUS6DB                (0)
index a07df6f..b7e78bf 100644 (file)
@@ -7057,6 +7057,27 @@ static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */
+static void alc287_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */
+       static const hda_nid_t preferred_pairs[] = {
+               0x17, 0x02, 0x21, 0x03, 0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
+       spec->gen.preferred_dacs = preferred_pairs;
+       spec->gen.auto_mute_via_amp = 1;
+       snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                           0x0); /* Make sure 0x14 was disable */
+}
+
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -7319,6 +7340,7 @@ enum {
        ALC287_FIXUP_TAS2781_I2C,
        ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
        ALC245_FIXUP_HP_X360_MUTE_LEDS,
+       ALC287_FIXUP_THINKPAD_I2S_SPK,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9413,6 +9435,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC245_FIXUP_HP_GPIO_LED
        },
+       [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_bind_dacs,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10544,6 +10570,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x90170111},
                {0x19, 0x03a11030},
                {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
+               {0x17, 0x90170110},
+               {0x19, 0x03a11030},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
index 37114fd..fb80280 100644 (file)
@@ -173,16 +173,6 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
        return 0;
 }
 
-static int tasdevice_hda_clamp(int val, int max)
-{
-       if (val > max)
-               val = max;
-
-       if (val < 0)
-               val = 0;
-       return val;
-}
-
 static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
                struct snd_ctl_elem_value *ucontrol)
 {
@@ -191,7 +181,7 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
        int max = tas_priv->rcabin.ncfgs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_profile, max);
+       val = clamp(nr_profile, 0, max);
 
        if (tas_priv->rcabin.profile_cfg_id != val) {
                tas_priv->rcabin.profile_cfg_id = val;
@@ -248,7 +238,7 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_programs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_program, max);
+       val = clamp(nr_program, 0, max);
 
        if (tas_priv->cur_prog != val) {
                tas_priv->cur_prog = val;
@@ -277,7 +267,7 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_configurations - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_config, max);
+       val = clamp(nr_config, 0, max);
 
        if (tas_priv->cur_conf != val) {
                tas_priv->cur_conf = val;
index b304b35..3ec15b4 100644 (file)
@@ -217,6 +217,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                .driver_data = &acp6x_card,
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82TL"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
                }
        },
@@ -328,6 +335,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
        {
                .driver_data = &acp6x_card,
                .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+                       DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
                }
index afe213a..dcc4e14 100644 (file)
@@ -954,7 +954,7 @@ static int mchp_pdmc_dt_init(struct mchp_pdmc *dd)
 /* used to clean the channel index found on RHR's MSB */
 static int mchp_pdmc_process(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
-                            struct iov_iter *buf, unsigned long bytes)
+                            unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        u8 *dma_ptr = runtime->dma_area + hwoff +
index 95b5bd8..f1e1dbc 100644 (file)
@@ -1968,11 +1968,15 @@ config SND_SOC_UDA1380
        tristate
        depends on I2C
 
+config SND_SOC_WCD_CLASSH
+       tristate
+
 config SND_SOC_WCD9335
        tristate "WCD9335 Codec"
        depends on SLIMBUS
        select REGMAP_SLIMBUS
        select REGMAP_IRQ
+       select SND_SOC_WCD_CLASSH
        help
          The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
          Qualcomm Technologies, Inc. (QTI) multimedia solutions,
@@ -1987,6 +1991,7 @@ config SND_SOC_WCD934X
        depends on SLIMBUS
        select REGMAP_IRQ
        select REGMAP_SLIMBUS
+       select SND_SOC_WCD_CLASSH
        select SND_SOC_WCD_MBHC
        depends on MFD_WCD934X || COMPILE_TEST
        help
@@ -1997,6 +2002,7 @@ config SND_SOC_WCD938X
        depends on SND_SOC_WCD938X_SDW
        tristate
        depends on SOUNDWIRE || !SOUNDWIRE
+       select SND_SOC_WCD_CLASSH
 
 config SND_SOC_WCD938X_SDW
        tristate "WCD9380/WCD9385 Codec - SDW"
index c8502a4..a87e569 100644 (file)
@@ -303,10 +303,11 @@ snd-soc-twl4030-objs := twl4030.o
 snd-soc-twl6040-objs := twl6040.o
 snd-soc-uda1334-objs := uda1334.o
 snd-soc-uda1380-objs := uda1380.o
+snd-soc-wcd-classh-objs := wcd-clsh-v2.o
 snd-soc-wcd-mbhc-objs := wcd-mbhc-v2.o
-snd-soc-wcd9335-objs := wcd-clsh-v2.o wcd9335.o
-snd-soc-wcd934x-objs := wcd-clsh-v2.o wcd934x.o
-snd-soc-wcd938x-objs := wcd938x.o wcd-clsh-v2.o
+snd-soc-wcd9335-objs := wcd9335.o
+snd-soc-wcd934x-objs := wcd934x.o
+snd-soc-wcd938x-objs := wcd938x.o
 snd-soc-wcd938x-sdw-objs := wcd938x-sdw.o
 snd-soc-wl1273-objs := wl1273.o
 snd-soc-wm-adsp-objs := wm_adsp.o
@@ -685,6 +686,7 @@ obj-$(CONFIG_SND_SOC_TWL4030)       += snd-soc-twl4030.o
 obj-$(CONFIG_SND_SOC_TWL6040)  += snd-soc-twl6040.o
 obj-$(CONFIG_SND_SOC_UDA1334)  += snd-soc-uda1334.o
 obj-$(CONFIG_SND_SOC_UDA1380)  += snd-soc-uda1380.o
+obj-$(CONFIG_SND_SOC_WCD_CLASSH)       += snd-soc-wcd-classh.o
 obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o
 obj-$(CONFIG_SND_SOC_WCD9335)  += snd-soc-wcd9335.o
 obj-$(CONFIG_SND_SOC_WCD934X)  += snd-soc-wcd934x.o
index d1edb98..be4f422 100644 (file)
@@ -279,7 +279,7 @@ static const struct snd_kcontrol_new cs35l45_dsp_muxes[] = {
 };
 
 static const struct snd_kcontrol_new cs35l45_dac_muxes[] = {
-       SOC_DAPM_ENUM("DACPCM1 Source", cs35l45_dacpcm_enums[0]),
+       SOC_DAPM_ENUM("DACPCM Source", cs35l45_dacpcm_enums[0]),
 };
 
 static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
@@ -333,7 +333,7 @@ static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
        SND_SOC_DAPM_MUX("DSP_RX7 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[6]),
        SND_SOC_DAPM_MUX("DSP_RX8 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[7]),
 
-       SND_SOC_DAPM_MUX("DACPCM1 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
+       SND_SOC_DAPM_MUX("DACPCM Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
 
        SND_SOC_DAPM_OUT_DRV("AMP", SND_SOC_NOPM, 0, 0, NULL, 0),
 
@@ -403,7 +403,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        { "ASP_RX1", NULL, "ASP_EN" },
        { "ASP_RX2", NULL, "ASP_EN" },
 
-       { "AMP", NULL, "DACPCM1 Source"},
+       { "AMP", NULL, "DACPCM Source"},
        { "AMP", NULL, "GLOBAL_EN"},
 
        CS35L45_DSP_MUX_ROUTE("DSP_RX1"),
@@ -427,7 +427,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        {"DSP1 Preload", NULL, "DSP1 Preloader"},
        {"DSP1", NULL, "DSP1 Preloader"},
 
-       CS35L45_DAC_MUX_ROUTE("DACPCM1"),
+       CS35L45_DAC_MUX_ROUTE("DACPCM"),
 
        { "SPK", NULL, "AMP"},
 };
@@ -969,7 +969,7 @@ static irqreturn_t cs35l45_dsp_virt2_mbox_cb(int irq, void *data)
 
        ret = regmap_read(cs35l45->regmap, CS35L45_DSP_VIRT2_MBOX_3, &mbox_val);
        if (!ret && mbox_val)
-               ret = cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
+               cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
                                (mbox_val & CS35L45_MBOX3_DATA_MASK) >> CS35L45_MBOX3_DATA_SHIFT);
 
        /* Handle DSP trace log IRQ */
@@ -1078,6 +1078,7 @@ static int cs35l45_initialize(struct cs35l45_private *cs35l45)
 
        switch (dev_id[0]) {
        case 0x35A450:
+       case 0x35A460:
                break;
        default:
                dev_err(cs35l45->dev, "Bad DEVID 0x%x\n", dev_id[0]);
index ae373f3..98b1e63 100644 (file)
@@ -243,26 +243,27 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base)
 {
        unsigned int reg;
        unsigned int val;
-       int ret;
+       int read_ret, poll_ret;
 
        if (cs35l56_base->rev < CS35L56_REVID_B0)
                reg = CS35L56_DSP1_HALO_STATE_A1;
        else
                reg = CS35L56_DSP1_HALO_STATE;
 
-       ret = regmap_read_poll_timeout(cs35l56_base->regmap, reg,
-                                      val,
-                                      (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
-                                      CS35L56_HALO_STATE_POLL_US,
-                                      CS35L56_HALO_STATE_TIMEOUT_US);
-
-       if ((ret < 0) && (ret != -ETIMEDOUT)) {
-               dev_err(cs35l56_base->dev, "Failed to read HALO_STATE: %d\n", ret);
-               return ret;
-       }
-
-       if ((ret == -ETIMEDOUT) || (val != CS35L56_HALO_STATE_BOOT_DONE)) {
-               dev_err(cs35l56_base->dev, "Firmware boot fail: HALO_STATE=%#x\n", val);
+       /*
+        * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK
+        * I2C until it has booted which would terminate the poll
+        */
+       poll_ret = read_poll_timeout(regmap_read, read_ret,
+                                    (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
+                                    CS35L56_HALO_STATE_POLL_US,
+                                    CS35L56_HALO_STATE_TIMEOUT_US,
+                                    false,
+                                    cs35l56_base->regmap, reg, &val);
+
+       if (poll_ret) {
+               dev_err(cs35l56_base->dev, "Firmware boot timed out(%d): HALO_STATE=%#x\n",
+                       read_ret, val);
                return -EIO;
        }
 
index 24e718e..1a95c37 100644 (file)
@@ -2205,7 +2205,8 @@ static int cs42l43_codec_probe(struct platform_device *pdev)
        // Don't use devm as we need to get against the MFD device
        priv->mclk = clk_get_optional(cs42l43->dev, "mclk");
        if (IS_ERR(priv->mclk)) {
-               dev_err_probe(priv->dev, PTR_ERR(priv->mclk), "Failed to get mclk\n");
+               ret = PTR_ERR(priv->mclk);
+               dev_err_probe(priv->dev, ret, "Failed to get mclk\n");
                goto err_pm;
        }
 
index 038d93e..1a137ca 100644 (file)
@@ -3269,13 +3269,17 @@ static int rt5645_component_set_jack(struct snd_soc_component *component,
 {
        struct snd_soc_jack *mic_jack = NULL;
        struct snd_soc_jack *btn_jack = NULL;
-       int *type = (int *)data;
+       int type;
 
-       if (*type & SND_JACK_MICROPHONE)
-               mic_jack = hs_jack;
-       if (*type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-               SND_JACK_BTN_2 | SND_JACK_BTN_3))
-               btn_jack = hs_jack;
+       if (hs_jack) {
+               type = *(int *)data;
+
+               if (type & SND_JACK_MICROPHONE)
+                       mic_jack = hs_jack;
+               if (type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+                       SND_JACK_BTN_2 | SND_JACK_BTN_3))
+                       btn_jack = hs_jack;
+       }
 
        return rt5645_set_jack_detect(component, hs_jack, mic_jack, btn_jack);
 }
index a75db27..d96e23e 100644 (file)
@@ -355,6 +355,7 @@ void wcd_clsh_set_hph_mode(struct wcd_clsh_ctrl *ctrl, int mode)
                wcd_clsh_v2_set_hph_mode(comp, mode);
 
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_set_hph_mode);
 
 static void wcd_clsh_set_flyback_current(struct snd_soc_component *comp,
                                         int mode)
@@ -869,11 +870,13 @@ int wcd_clsh_ctrl_set_state(struct wcd_clsh_ctrl *ctrl,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_set_state);
 
 int wcd_clsh_ctrl_get_state(struct wcd_clsh_ctrl *ctrl)
 {
        return ctrl->state;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_get_state);
 
 struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
                                          int version)
@@ -890,8 +893,13 @@ struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
 
        return ctrl;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_alloc);
 
 void wcd_clsh_ctrl_free(struct wcd_clsh_ctrl *ctrl)
 {
        kfree(ctrl);
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_free);
+
+MODULE_DESCRIPTION("WCD93XX Class-H driver");
+MODULE_LICENSE("GPL");
index 1fbb2c2..8565a53 100644 (file)
@@ -796,6 +796,28 @@ static int avs_component_probe(struct snd_soc_component *component)
 
        ret = avs_load_topology(component, filename);
        kfree(filename);
+       if (ret == -ENOENT && !strncmp(mach->tplg_filename, "hda-", 4)) {
+               unsigned int vendor_id;
+
+               if (sscanf(mach->tplg_filename, "hda-%08x-tplg.bin", &vendor_id) != 1)
+                       return ret;
+
+               if (((vendor_id >> 16) & 0xFFFF) == 0x8086)
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-8086-generic-tplg.bin");
+               else
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-generic-tplg.bin");
+
+               filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix,
+                                    mach->tplg_filename);
+               if (!filename)
+                       return -ENOMEM;
+
+               dev_info(card->dev, "trying to load fallback topology %s\n", mach->tplg_filename);
+               ret = avs_load_topology(component, filename);
+               kfree(filename);
+       }
        if (ret < 0)
                return ret;
 
index f18406d..ba7c0ae 100644 (file)
@@ -1054,7 +1054,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream)
 
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes)
+                              struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
        struct snd_soc_component *component;
@@ -1065,7 +1065,7 @@ int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                if (component->driver->copy)
                        return soc_component_ret(component,
                                component->driver->copy(component, substream,
-                                       channel, pos, buf, bytes));
+                                       channel, pos, iter, bytes));
 
        return -EINVAL;
 }
index ff21665..d0653d7 100644 (file)
@@ -290,29 +290,29 @@ static snd_pcm_uframes_t dmaengine_pcm_pointer(
 static int dmaengine_copy(struct snd_soc_component *component,
                          struct snd_pcm_substream *substream,
                          int channel, unsigned long hwoff,
-                         struct iov_iter *buf, unsigned long bytes)
+                         struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct dmaengine_pcm *pcm = soc_component_to_pcm(component);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes) = pcm->config->process;
+                      unsigned long bytes) = pcm->config->process;
        bool is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
        void *dma_ptr = runtime->dma_area + hwoff +
                        channel * (runtime->dma_bytes / runtime->channels);
 
        if (is_playback)
-               if (copy_from_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_from_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        if (process) {
-               int ret = process(substream, channel, hwoff, buf, bytes);
+               int ret = process(substream, channel, hwoff, bytes);
                if (ret < 0)
                        return ret;
        }
 
        if (!is_playback)
-               if (copy_to_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_to_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        return 0;
index f9b5d59..0acc848 100644 (file)
@@ -1246,7 +1246,7 @@ static const struct snd_soc_dai_ops stm32_sai_pcm_dai_ops2 = {
 
 static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream,
                                       int channel, unsigned long hwoff,
-                                      struct iov_iter *buf, unsigned long bytes)
+                                      unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
index a27e244..1ec177f 100644 (file)
@@ -265,7 +265,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 
        if (!ep)
                return;
-       for (i = 0; i < ep->num_urbs; ++i) {
+       for (i = 0; i < NUM_URBS; ++i) {
                ctx = &ep->urbs[i];
                if (!ctx->urb)
                        break;
@@ -279,6 +279,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 }
 
 /* allocate URBs for an EP */
+/* the callers should handle allocation errors via free_midi_urbs() */
 static int alloc_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 {
        struct snd_usb_midi2_urb *ctx;
@@ -351,8 +352,10 @@ static int snd_usb_midi_v2_open(struct snd_ump_endpoint *ump, int dir)
                return -EIO;
        if (ep->direction == STR_OUT) {
                err = alloc_midi_urbs(ep);
-               if (err)
+               if (err) {
+                       free_midi_urbs(ep);
                        return err;
+               }
        }
        return 0;
 }
index 1f6d904..798e60b 100644 (file)
 #define X86_FEATURE_SEV_ES             (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_V_TSC_AUX          (19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT       (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP         (19*32+14) /* AMD SEV-ES full debug state swap support */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP  (20*32+ 0) /* "" No Nested Data Breakpoints */
index 0b214f6..2e5c231 100644 (file)
@@ -83,7 +83,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 #define perf_event_name(array, id) ({                  \
        const char *event_str = NULL;                   \
                                                        \
-       if ((id) >= 0 && (id) < ARRAY_SIZE(array))      \
+       if ((id) < ARRAY_SIZE(array))                   \
                event_str = array[id];                  \
        event_str;                                      \
 })
index 6c1da51..1c5606c 100644 (file)
@@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
 
-CFLAGS += -Wall -Wextra -I../lib/
-LDFLAGS += $(LIBS)
+CFLAGS += -Wall -Wextra -I../lib/ -pthread
+LDFLAGS += $(LIBS) -pthread
 
 all: $(TARGETS)
 
index dc53180..b53753d 100644 (file)
@@ -57,7 +57,7 @@ LIB_MIN=                      1
 
 PACKAGE =                      cpupower
 PACKAGE_BUGREPORT =            linux-pm@vger.kernel.org
-LANGUAGES =                    de fr it cs pt
+LANGUAGES =                    de fr it cs pt ka
 
 
 # Directory definitions. These are default and most probably
index edef49f..caede9b 100644 (file)
@@ -50,14 +50,17 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_cgroup_storage \
        test_tcpnotify_user test_sysctl \
        test_progs-no_alu32
+TEST_INST_SUBDIRS := no_alu32
 
 # Also test bpf-gcc, if present
 ifneq ($(BPF_GCC),)
 TEST_GEN_PROGS += test_progs-bpf_gcc
+TEST_INST_SUBDIRS += bpf_gcc
 endif
 
 ifneq ($(CLANG_CPUV4),)
 TEST_GEN_PROGS += test_progs-cpuv4
+TEST_INST_SUBDIRS += cpuv4
 endif
 
 TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
@@ -714,3 +717,12 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)     \
 
 # Delete partially updated (corrupted) files on error
 .DELETE_ON_ERROR:
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+       $(DEFAULT_INSTALL_RULE)
+       @for DIR in $(TEST_INST_SUBDIRS); do              \
+               mkdir -p $(INSTALL_PATH)/$$DIR;   \
+               rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
+       done
+endef
index 31f1e81..ee0458a 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/unistd.h>
 #include <linux/mount.h>
 #include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
 
 static inline int sys_fsopen(const char *fsname, unsigned flags)
 {
@@ -155,7 +156,7 @@ static void validate_pin(int map_fd, const char *map_name, int src_value,
        ASSERT_OK(err, "obj_pin");
 
        /* cleanup */
-       if (pin_opts.path_fd >= 0)
+       if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
                close(pin_opts.path_fd);
        if (old_cwd[0])
                ASSERT_OK(chdir(old_cwd), "restore_cwd");
@@ -220,7 +221,7 @@ static void validate_get(int map_fd, const char *map_name, int src_value,
                goto cleanup;
 
        /* cleanup */
-       if (get_opts.path_fd >= 0)
+       if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
                close(get_opts.path_fd);
        if (old_cwd[0])
                ASSERT_OK(chdir(old_cwd), "restore_cwd");
index 911345c..ccc7685 100644 (file)
 #include "test_d_path_check_rdonly_mem.skel.h"
 #include "test_d_path_check_types.skel.h"
 
+/* sys_close_range is not around for long time, so let's
+ * make sure we can call it on systems with older glibc
+ */
+#ifndef __NR_close_range
+#ifdef __alpha__
+#define __NR_close_range 546
+#else
+#define __NR_close_range 436
+#endif
+#endif
+
 static int duration;
 
 static struct {
@@ -90,7 +101,11 @@ static int trigger_fstat_events(pid_t pid)
        fstat(indicatorfd, &fileStat);
 
 out_close:
-       /* triggers filp_close */
+       /* sys_close no longer triggers filp_close, but we can
+        * call sys_close_range instead which still does
+        */
+#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+
        close(pipefd[0]);
        close(pipefd[1]);
        close(sockfd);
@@ -98,6 +113,8 @@ out_close:
        close(devfd);
        close(localfd);
        close(indicatorfd);
+
+#undef close
        return ret;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
new file mode 100644 (file)
index 0000000..f35852d
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "sk_storage_omem_uncharge.skel.h"
+
+void test_sk_storage_omem_uncharge(void)
+{
+       struct sk_storage_omem_uncharge *skel;
+       int sk_fd = -1, map_fd, err, value;
+       socklen_t optlen;
+
+       skel = sk_storage_omem_uncharge__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+               return;
+       map_fd = bpf_map__fd(skel->maps.sk_storage);
+
+       /* A standalone socket not binding to addr:port,
+        * so nentns is not needed.
+        */
+       sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (!ASSERT_GE(sk_fd, 0, "socket"))
+               goto done;
+
+       optlen = sizeof(skel->bss->cookie);
+       err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
+       if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
+               goto done;
+
+       value = 0;
+       err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+       if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
+               goto done;
+
+       value = 0xdeadbeef;
+       err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+       if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
+               goto done;
+
+       err = sk_storage_omem_uncharge__attach(skel);
+       if (!ASSERT_OK(err, "attach"))
+               goto done;
+
+       close(sk_fd);
+       sk_fd = -1;
+
+       ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
+       ASSERT_EQ(skel->bss->omem, 0, "omem");
+
+done:
+       sk_storage_omem_uncharge__destroy(skel);
+       if (sk_fd != -1)
+               close(sk_fd);
+}
index d126654..36d829a 100644 (file)
                __ret;                                                         \
        })
 
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+       struct timeval timeout = { .tv_sec = timeout_sec };
+       fd_set wfds;
+       int r, eval;
+       socklen_t esize = sizeof(eval);
+
+       FD_ZERO(&wfds);
+       FD_SET(fd, &wfds);
+
+       r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+       if (r == 0)
+               errno = ETIME;
+       if (r != 1)
+               return -1;
+
+       if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+               return -1;
+       if (eval != 0) {
+               errno = eval;
+               return -1;
+       }
+
+       return 0;
+}
+
 static inline int poll_read(int fd, unsigned int timeout_sec)
 {
        struct timeval timeout = { .tv_sec = timeout_sec };
index 5674a9d..8df8cbb 100644 (file)
@@ -1452,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
        if (p < 0)
                goto close_cli;
 
+       if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
+               FAIL_ERRNO("poll_connect");
+               goto close_acc;
+       }
+
        *v0 = p;
        *v1 = c;
 
        return 0;
 
+close_acc:
+       close(p);
 close_cli:
        close(c);
 close_srv:
index cfed4df..0b793a1 100644 (file)
@@ -88,6 +88,7 @@
 #define sk_v6_rcv_saddr                __sk_common.skc_v6_rcv_saddr
 #define sk_flags               __sk_common.skc_flags
 #define sk_reuse               __sk_common.skc_reuse
+#define sk_cookie              __sk_common.skc_cookie
 
 #define s6_addr32              in6_u.u6_addr32
 
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
new file mode 100644 (file)
index 0000000..3e74579
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+void *local_storage_ptr = NULL;
+void *sk_ptr = NULL;
+int cookie_found = 0;
+__u64 cookie = 0;
+__u32 omem = 0;
+
+void *bpf_rdonly_cast(void *, __u32) __ksym;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, int);
+} sk_storage SEC(".maps");
+
+SEC("fexit/bpf_local_storage_destroy")
+int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+{
+       struct sock *sk;
+
+       if (local_storage_ptr != local_storage)
+               return 0;
+
+       sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
+       if (sk->sk_cookie.counter != cookie)
+               return 0;
+
+       cookie_found++;
+       omem = sk->sk_omem_alloc.counter;
+       local_storage_ptr = NULL;
+
+       return 0;
+}
+
+SEC("fentry/inet6_sock_destruct")
+int BPF_PROG(inet6_sock_destruct, struct sock *sk)
+{
+       int *value;
+
+       if (!cookie || sk->sk_cookie.counter != cookie)
+               return 0;
+
+       value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
+       if (value && *value == 0xdeadbeef) {
+               cookie_found++;
+               sk_ptr = sk;
+               local_storage_ptr = sk->sk_bpf_storage;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index c692cc8..a3bb36f 100644 (file)
@@ -23,6 +23,7 @@ LIBKVM += lib/guest_modes.c
 LIBKVM += lib/io.c
 LIBKVM += lib/kvm_util.c
 LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
@@ -122,6 +123,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += guest_print_test
 TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
@@ -140,7 +142,6 @@ TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
 TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
 TEST_GEN_PROGS_aarch64 += aarch64/psci_test
@@ -152,6 +153,8 @@ TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
+TEST_GEN_PROGS_aarch64 += guest_print_test
+TEST_GEN_PROGS_aarch64 += get-reg-list
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
 TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
@@ -166,8 +169,10 @@ TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
 TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += guest_print_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 TEST_GEN_PROGS_s390x += kvm_page_table_test
 TEST_GEN_PROGS_s390x += rseq_test
@@ -176,11 +181,15 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
 TEST_GEN_PROGS_riscv += demand_paging_test
 TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += get-reg-list
 TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
 TEST_GEN_PROGS_riscv += kvm_page_table_test
 TEST_GEN_PROGS_riscv += set_memory_region_test
 TEST_GEN_PROGS_riscv += kvm_binary_stats_test
 
+SPLIT_TESTS += get-reg-list
+
 TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
@@ -204,6 +213,7 @@ endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -Wno-gnu-variable-sized-type-not-at-end -MD\
        -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
+       -fno-builtin-strnlen \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
        -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
@@ -228,11 +238,14 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
 LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
 LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
 LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS))
 
 TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
 TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
 TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
 TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS))
 -include $(TEST_DEP_FILES)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
@@ -240,7 +253,10 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
 $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.*
+$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
index 4951ac5..b905808 100644 (file)
@@ -98,7 +98,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                /*
                 * Expect the ioctl to succeed with no effect on the register
@@ -107,7 +107,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
@@ -127,14 +127,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
                TEST_ASSERT(r < 0 && errno == EINVAL,
                            "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
index 8ef3709..274b846 100644 (file)
@@ -19,7 +19,6 @@
  *
  * Copyright (c) 2021, Google LLC.
  */
-
 #define _GNU_SOURCE
 
 #include <stdlib.h>
@@ -155,11 +154,13 @@ static void guest_validate_irq(unsigned int intid,
        xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
 
        /* Make sure we are dealing with the correct timer IRQ */
-       GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+       GUEST_ASSERT_EQ(intid, timer_irq);
 
        /* Basic 'timer condition met' check */
-       GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
-       GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+       __GUEST_ASSERT(xcnt >= cval,
+                      "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+                      xcnt, cval, xcnt_diff_us);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
 
        WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
 }
@@ -192,8 +193,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
                        TIMER_TEST_ERR_MARGIN_US);
 
                irq_iter = READ_ONCE(shared_data->nr_iter);
-               GUEST_ASSERT_2(config_iter + 1 == irq_iter,
-                               config_iter + 1, irq_iter);
+               GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
        }
 }
 
@@ -243,13 +243,9 @@ static void *test_vcpu_run(void *arg)
                break;
        case UCALL_ABORT:
                sync_global_from_guest(vm, *shared_data);
-               REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u",
-                                     GUEST_ASSERT_ARG(uc, 0),
-                                     GUEST_ASSERT_ARG(uc, 1),
-                                     GUEST_ASSERT_ARG(uc, 2),
-                                     vcpu_idx,
-                                     shared_data->guest_stage,
-                                     shared_data->nr_iter);
+               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
+                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Unexpected guest exit\n");
index 637be79..f5b6cb3 100644 (file)
@@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs)
 
 static void guest_ss_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
        ss_addr[ss_idx++] = regs->pc;
        regs->pstate |= SPSR_SS;
 }
@@ -410,8 +410,8 @@ static void guest_code_ss(int test_cnt)
                /* Userspace disables Single Step when the end is nigh. */
                asm volatile("iter_ss_end:\n");
 
-               GUEST_ASSERT(bvr == w_bvr);
-               GUEST_ASSERT(wvr == w_wvr);
+               GUEST_ASSERT_EQ(bvr, w_bvr);
+               GUEST_ASSERT_EQ(wvr, w_wvr);
        }
        GUEST_DONE();
 }
@@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp
        vcpu_run(vcpu);
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                goto done;
index 4f10055..709d7d7 100644 (file)
@@ -4,50 +4,17 @@
  *
  * Copyright (C) 2020, Red Hat, Inc.
  *
- * When attempting to migrate from a host with an older kernel to a host
- * with a newer kernel we allow the newer kernel on the destination to
- * list new registers with get-reg-list. We assume they'll be unused, at
- * least until the guest reboots, and so they're relatively harmless.
- * However, if the destination host with the newer kernel is missing
- * registers which the source host with the older kernel has, then that's
- * a regression in get-reg-list. This test checks for that regression by
- * checking the current list against a blessed list. We should never have
- * missing registers, but if new ones appear then they can probably be
- * added to the blessed list. A completely new blessed list can be created
- * by running the test with the --list command line argument.
- *
- * Note, the blessed list should be created from the oldest possible
- * kernel. We can't go older than v4.15, though, because that's the first
- * release to expose the ID system registers in KVM_GET_REG_LIST, see
- * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
- * from guests"). Also, one must use the --core-reg-fixup command line
- * option when running on an older kernel that doesn't include df205b5c6328
- * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
  */
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/wait.h>
 #include "kvm_util.h"
 #include "test_util.h"
 #include "processor.h"
 
-static struct kvm_reg_list *reg_list;
-static __u64 *blessed_reg, blessed_n;
-
-struct reg_sublist {
-       const char *name;
-       long capability;
-       int feature;
-       bool finalize;
-       __u64 *regs;
-       __u64 regs_n;
-       __u64 *rejects_set;
-       __u64 rejects_set_n;
-};
-
 struct feature_id_reg {
        __u64 reg;
        __u64 id_reg;
@@ -76,70 +43,7 @@ static struct feature_id_reg feat_id_regs[] = {
        }
 };
 
-struct vcpu_config {
-       char *name;
-       struct reg_sublist sublists[];
-};
-
-static struct vcpu_config *vcpu_configs[];
-static int vcpu_configs_n;
-
-#define for_each_sublist(c, s)                                                 \
-       for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-
-#define for_each_reg(i)                                                                \
-       for ((i) = 0; (i) < reg_list->n; ++(i))
-
-#define for_each_reg_filtered(i)                                               \
-       for_each_reg(i)                                                         \
-               if (!filter_reg(reg_list->reg[i]))
-
-#define for_each_missing_reg(i)                                                        \
-       for ((i) = 0; (i) < blessed_n; ++(i))                                   \
-               if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))      \
-                       if (check_supported_feat_reg(vcpu, blessed_reg[i]))
-
-#define for_each_new_reg(i)                                                    \
-       for_each_reg_filtered(i)                                                \
-               if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
-
-static const char *config_name(struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-       int len = 0;
-
-       if (c->name)
-               return c->name;
-
-       for_each_sublist(c, s)
-               len += strlen(s->name) + 1;
-
-       c->name = malloc(len);
-
-       len = 0;
-       for_each_sublist(c, s) {
-               if (!strcmp(s->name, "base"))
-                       continue;
-               strcat(c->name + len, s->name);
-               len += strlen(s->name) + 1;
-               c->name[len - 1] = '+';
-       }
-       c->name[len - 1] = '\0';
-
-       return c->name;
-}
-
-static bool has_cap(struct vcpu_config *c, long capability)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s)
-               if (s->capability == capability)
-                       return true;
-       return false;
-}
-
-static bool filter_reg(__u64 reg)
+bool filter_reg(__u64 reg)
 {
        /*
         * DEMUX register presence depends on the host's CLIDR_EL1.
@@ -151,16 +55,6 @@ static bool filter_reg(__u64 reg)
        return false;
 }
 
-static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
-{
-       int i;
-
-       for (i = 0; i < nr_regs; ++i)
-               if (reg == regs[i])
-                       return true;
-       return false;
-}
-
 static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
 {
        int i, ret;
@@ -180,17 +74,27 @@ static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
        return true;
 }
 
-static const char *str_with_index(const char *template, __u64 index)
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
 {
-       char *str, *p;
-       int n;
+       return check_supported_feat_reg(vcpu, reg);
+}
 
-       str = strdup(template);
-       p = strstr(str, "##");
-       n = sprintf(p, "%lld", index);
-       strcat(p + n, strstr(template, "##") + 2);
+bool check_reject_set(int err)
+{
+       return err == EPERM;
+}
 
-       return (const char *)str;
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int feature;
+
+       for_each_sublist(c, s) {
+               if (s->finalize) {
+                       feature = s->feature;
+                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+               }
+       }
 }
 
 #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
@@ -199,7 +103,7 @@ static const char *str_with_index(const char *template, __u64 index)
 #define CORE_SPSR_XX_NR_WORDS  2
 #define CORE_FPREGS_XX_NR_WORDS        4
 
-static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *core_id_to_str(const char *prefix, __u64 id)
 {
        __u64 core_off = id & ~REG_MASK, idx;
 
@@ -210,8 +114,8 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
        case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
             KVM_REG_ARM_CORE_REG(regs.regs[30]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(regs.sp):
                return "KVM_REG_ARM_CORE_REG(regs.sp)";
        case KVM_REG_ARM_CORE_REG(regs.pc):
@@ -225,24 +129,24 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
        case KVM_REG_ARM_CORE_REG(spsr[0]) ...
             KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
             KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
        case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
        }
 
-       TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
        return NULL;
 }
 
-static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *sve_id_to_str(const char *prefix, __u64 id)
 {
        __u64 sve_off, n, i;
 
@@ -252,37 +156,37 @@ static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
        sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
        i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
 
-       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
+       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
 
        switch (sve_off) {
        case KVM_REG_ARM64_SVE_ZREG_BASE ...
             KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
-               return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
        case KVM_REG_ARM64_SVE_PREG_BASE ...
             KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
-               return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
        case KVM_REG_ARM64_SVE_FFR_BASE:
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
                return "KVM_REG_ARM64_SVE_FFR(0)";
        }
 
        return NULL;
 }
 
-static void print_reg(struct vcpu_config *c, __u64 id)
+void print_reg(const char *prefix, __u64 id)
 {
        unsigned op0, op1, crn, crm, op2;
        const char *reg_size = NULL;
 
        TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
+                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
 
        switch (id & KVM_REG_SIZE_MASK) {
        case KVM_REG_SIZE_U8:
@@ -314,16 +218,16 @@ static void print_reg(struct vcpu_config *c, __u64 id)
                break;
        default:
                TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
-                         config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
        }
 
        switch (id & KVM_REG_ARM_COPROC_MASK) {
        case KVM_REG_ARM_CORE:
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
                break;
        case KVM_REG_ARM_DEMUX:
                TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
                       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
                break;
@@ -334,370 +238,34 @@ static void print_reg(struct vcpu_config *c, __u64 id)
                crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
                op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
                TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
                printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
                break;
        case KVM_REG_ARM_FW:
                TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-                           "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
                break;
        case KVM_REG_ARM_FW_FEAT_BMAP:
                TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
-                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
                break;
        case KVM_REG_ARM64_SVE:
-               if (has_cap(c, KVM_CAP_ARM_SVE))
-                       printf("\t%s,\n", sve_id_to_str(c, id));
-               else
-                       TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
+               printf("\t%s,\n", sve_id_to_str(prefix, id));
                break;
        default:
                TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-                         config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
-       }
-}
-
-/*
- * Older kernels listed each 32-bit word of CORE registers separately.
- * For 64 and 128-bit registers we need to ignore the extra words. We
- * also need to fixup the sizes, because the older kernels stated all
- * registers were 64-bit, even when they weren't.
- */
-static void core_reg_fixup(void)
-{
-       struct kvm_reg_list *tmp;
-       __u64 id, core_off;
-       int i;
-
-       tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
-
-       for (i = 0; i < reg_list->n; ++i) {
-               id = reg_list->reg[i];
-
-               if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               }
-
-               core_off = id & ~REG_MASK;
-
-               switch (core_off) {
-               case 0x52: case 0xd2: case 0xd6:
-                       /*
-                        * These offsets are pointing at padding.
-                        * We need to ignore them too.
-                        */
-                       continue;
-               case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
-                    KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
-                       if (core_off & 3)
-                               continue;
-                       id &= ~KVM_REG_SIZE_MASK;
-                       id |= KVM_REG_SIZE_U128;
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
-               case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
-                       id &= ~KVM_REG_SIZE_MASK;
-                       id |= KVM_REG_SIZE_U32;
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               default:
-                       if (core_off & 1)
-                               continue;
-                       tmp->reg[tmp->n++] = id;
-                       break;
-               }
+                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
        }
-
-       free(reg_list);
-       reg_list = tmp;
-}
-
-static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s)
-               if (s->capability)
-                       init->features[s->feature / 32] |= 1 << (s->feature % 32);
-}
-
-static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-       int feature;
-
-       for_each_sublist(c, s) {
-               if (s->finalize) {
-                       feature = s->feature;
-                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
-               }
-       }
-}
-
-static void check_supported(struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s) {
-               if (!s->capability)
-                       continue;
-
-               __TEST_REQUIRE(kvm_has_cap(s->capability),
-                              "%s: %s not available, skipping tests\n",
-                              config_name(c), s->name);
-       }
-}
-
-static bool print_list;
-static bool print_filtered;
-static bool fixup_core_regs;
-
-static void run_test(struct vcpu_config *c)
-{
-       struct kvm_vcpu_init init = { .target = -1, };
-       int new_regs = 0, missing_regs = 0, i, n;
-       int failed_get = 0, failed_set = 0, failed_reject = 0;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct reg_sublist *s;
-
-       check_supported(c);
-
-       vm = vm_create_barebones();
-       prepare_vcpu_init(c, &init);
-       vcpu = __vm_vcpu_add(vm, 0);
-       aarch64_vcpu_setup(vcpu, &init);
-       finalize_vcpu(vcpu, c);
-
-       reg_list = vcpu_get_reg_list(vcpu);
-
-       if (fixup_core_regs)
-               core_reg_fixup();
-
-       if (print_list || print_filtered) {
-               putchar('\n');
-               for_each_reg(i) {
-                       __u64 id = reg_list->reg[i];
-                       if ((print_list && !filter_reg(id)) ||
-                           (print_filtered && filter_reg(id)))
-                               print_reg(c, id);
-               }
-               putchar('\n');
-               return;
-       }
-
-       /*
-        * We only test that we can get the register and then write back the
-        * same value. Some registers may allow other values to be written
-        * back, but others only allow some bits to be changed, and at least
-        * for ID registers set will fail if the value does not exactly match
-        * what was returned by get. If registers that allow other values to
-        * be written need to have the other values tested, then we should
-        * create a new set of tests for those in a new independent test
-        * executable.
-        */
-       for_each_reg(i) {
-               uint8_t addr[2048 / 8];
-               struct kvm_one_reg reg = {
-                       .id = reg_list->reg[i],
-                       .addr = (__u64)&addr,
-               };
-               bool reject_reg = false;
-               int ret;
-
-               ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
-               if (ret) {
-                       printf("%s: Failed to get ", config_name(c));
-                       print_reg(c, reg.id);
-                       putchar('\n');
-                       ++failed_get;
-               }
-
-               /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
-               for_each_sublist(c, s) {
-                       if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
-                               reject_reg = true;
-                               ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-                               if (ret != -1 || errno != EPERM) {
-                                       printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
-                                       print_reg(c, reg.id);
-                                       putchar('\n');
-                                       ++failed_reject;
-                               }
-                               break;
-                       }
-               }
-
-               if (!reject_reg) {
-                       ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-                       if (ret) {
-                               printf("%s: Failed to set ", config_name(c));
-                               print_reg(c, reg.id);
-                               putchar('\n');
-                               ++failed_set;
-                       }
-               }
-       }
-
-       for_each_sublist(c, s)
-               blessed_n += s->regs_n;
-       blessed_reg = calloc(blessed_n, sizeof(__u64));
-
-       n = 0;
-       for_each_sublist(c, s) {
-               for (i = 0; i < s->regs_n; ++i)
-                       blessed_reg[n++] = s->regs[i];
-       }
-
-       for_each_new_reg(i)
-               ++new_regs;
-
-       for_each_missing_reg(i)
-               ++missing_regs;
-
-       if (new_regs || missing_regs) {
-               n = 0;
-               for_each_reg_filtered(i)
-                       ++n;
-
-               printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
-               printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
-                      config_name(c), reg_list->n, reg_list->n - n);
-       }
-
-       if (new_regs) {
-               printf("\n%s: There are %d new registers.\n"
-                      "Consider adding them to the blessed reg "
-                      "list with the following lines:\n\n", config_name(c), new_regs);
-               for_each_new_reg(i)
-                       print_reg(c, reg_list->reg[i]);
-               putchar('\n');
-       }
-
-       if (missing_regs) {
-               printf("\n%s: There are %d missing registers.\n"
-                      "The following lines are missing registers:\n\n", config_name(c), missing_regs);
-               for_each_missing_reg(i)
-                       print_reg(c, blessed_reg[i]);
-               putchar('\n');
-       }
-
-       TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
-                   "%s: There are %d missing registers; "
-                   "%d registers failed get; %d registers failed set; %d registers failed reject",
-                   config_name(c), missing_regs, failed_get, failed_set, failed_reject);
-
-       pr_info("%s: PASS\n", config_name(c));
-       blessed_n = 0;
-       free(blessed_reg);
-       free(reg_list);
-       kvm_vm_free(vm);
-}
-
-static void help(void)
-{
-       struct vcpu_config *c;
-       int i;
-
-       printf(
-       "\n"
-       "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
-       " --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
-       "                             '<selection>' may be\n");
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               printf(
-       "                               '%s'\n", config_name(c));
-       }
-
-       printf(
-       "\n"
-       " --list                      Print the register list rather than test it (requires --config)\n"
-       " --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
-       " --core-reg-fixup            Needed when running on old kernels with broken core reg listings\n"
-       "\n"
-       );
-}
-
-static struct vcpu_config *parse_config(const char *config)
-{
-       struct vcpu_config *c;
-       int i;
-
-       if (config[8] != '=')
-               help(), exit(1);
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               if (strcmp(config_name(c), &config[9]) == 0)
-                       break;
-       }
-
-       if (i == vcpu_configs_n)
-               help(), exit(1);
-
-       return c;
-}
-
-int main(int ac, char **av)
-{
-       struct vcpu_config *c, *sel = NULL;
-       int i, ret = 0;
-       pid_t pid;
-
-       for (i = 1; i < ac; ++i) {
-               if (strcmp(av[i], "--core-reg-fixup") == 0)
-                       fixup_core_regs = true;
-               else if (strncmp(av[i], "--config", 8) == 0)
-                       sel = parse_config(av[i]);
-               else if (strcmp(av[i], "--list") == 0)
-                       print_list = true;
-               else if (strcmp(av[i], "--list-filtered") == 0)
-                       print_filtered = true;
-               else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
-                       help(), exit(0);
-               else
-                       help(), exit(1);
-       }
-
-       if (print_list || print_filtered) {
-               /*
-                * We only want to print the register list of a single config.
-                */
-               if (!sel)
-                       help(), exit(1);
-       }
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               if (sel && c != sel)
-                       continue;
-
-               pid = fork();
-
-               if (!pid) {
-                       run_test(c);
-                       exit(0);
-               } else {
-                       int wstatus;
-                       pid_t wpid = wait(&wstatus);
-                       TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
-                       if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
-                               ret = KSFT_FAIL;
-               }
-       }
-
-       return ret;
 }
 
 /*
- * The current blessed list was primed with the output of kernel version
+ * The original blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
  *
  * The blessed list is up to date with kernel version v6.4 (or so we hope)
  */
@@ -1130,14 +698,14 @@ static __u64 pauth_generic_regs[] = {
                .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
        }
 
-static struct vcpu_config vregs_config = {
+static struct vcpu_reg_list vregs_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
        {0},
        },
 };
-static struct vcpu_config vregs_pmu_config = {
+static struct vcpu_reg_list vregs_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1145,14 +713,14 @@ static struct vcpu_config vregs_pmu_config = {
        {0},
        },
 };
-static struct vcpu_config sve_config = {
+static struct vcpu_reg_list sve_config = {
        .sublists = {
        BASE_SUBLIST,
        SVE_SUBLIST,
        {0},
        },
 };
-static struct vcpu_config sve_pmu_config = {
+static struct vcpu_reg_list sve_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        SVE_SUBLIST,
@@ -1160,7 +728,7 @@ static struct vcpu_config sve_pmu_config = {
        {0},
        },
 };
-static struct vcpu_config pauth_config = {
+static struct vcpu_reg_list pauth_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1168,7 +736,7 @@ static struct vcpu_config pauth_config = {
        {0},
        },
 };
-static struct vcpu_config pauth_pmu_config = {
+static struct vcpu_reg_list pauth_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1178,7 +746,7 @@ static struct vcpu_config pauth_pmu_config = {
        },
 };
 
-static struct vcpu_config *vcpu_configs[] = {
+struct vcpu_reg_list *vcpu_configs[] = {
        &vregs_config,
        &vregs_pmu_config,
        &sve_config,
@@ -1186,4 +754,4 @@ static struct vcpu_config *vcpu_configs[] = {
        &pauth_config,
        &pauth_pmu_config,
 };
-static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
index bef1499..31f66ba 100644 (file)
@@ -8,7 +8,6 @@
  * hypercalls are properly masked or unmasked to the guest when disabled or
  * enabled from the KVM userspace, respectively.
  */
-
 #include <errno.h>
 #include <linux/arm-smccc.h>
 #include <asm/kvm.h>
@@ -105,15 +104,17 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
                switch (stage) {
                case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
                case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
        }
 }
@@ -132,7 +133,7 @@ static void guest_code(void)
                        guest_test_hvc(false_hvc_info);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
 
                GUEST_SYNC(stage);
@@ -290,10 +291,7 @@ static void test_run(void)
                        guest_done = true;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u",
-                                             GUEST_ASSERT_ARG(uc, 0),
-                                             GUEST_ASSERT_ARG(uc, 1),
-                                             GUEST_ASSERT_ARG(uc, 2), stage);
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                default:
                        TEST_FAIL("Unexpected guest exit\n");
index df10f1f..47bb914 100644 (file)
@@ -7,7 +7,6 @@
  * hugetlbfs with a hole). It checks that the expected handling method is
  * called (e.g., uffd faults with the right address and write/read flag).
  */
-
 #define _GNU_SOURCE
 #include <linux/bitmap.h>
 #include <fcntl.h>
@@ -293,12 +292,12 @@ static void guest_code(struct test_desc *test)
 
 static void no_dabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, read_sysreg(far_el1));
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
 }
 
 static void no_iabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, regs->pc);
+       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
 }
 
 static struct uffd_args {
@@ -318,7 +317,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
 
        TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
                    "The only expected UFFD mode is MISSING");
-       ASSERT_EQ(addr, (uint64_t)args->hva);
+       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
 
        pr_debug("uffd fault: addr=%p write=%d\n",
                 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
@@ -432,7 +431,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
        region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
        hva = (void *)region->region.userspace_addr;
 
-       ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
 
        memcpy(hva, run->mmio.data, run->mmio.len);
        events.mmio_exits += 1;
@@ -631,9 +630,9 @@ static void setup_default_handlers(struct test_desc *test)
 
 static void check_event_counts(struct test_desc *test)
 {
-       ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
-       ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
-       ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
 }
 
 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
@@ -679,7 +678,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
                        }
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index 90d854e..2e64b48 100644 (file)
@@ -7,7 +7,6 @@
  * host to inject a specific intid via a GUEST_SYNC call, and then checks that
  * it received it.
  */
-
 #include <asm/kvm.h>
 #include <asm/kvm_para.h>
 #include <sys/eventfd.h>
@@ -781,7 +780,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
                        run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
new file mode 100644 (file)
index 0000000..be7bf52
--- /dev/null
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * The blessed list should be created from the oldest possible kernel.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+extern struct vcpu_reg_list *vcpu_configs[];
+extern int vcpu_configs_n;
+
+#define for_each_reg(i)                                                                \
+       for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_reg_filtered(i)                                               \
+       for_each_reg(i)                                                         \
+               if (!filter_reg(reg_list->reg[i]))
+
+#define for_each_missing_reg(i)                                                        \
+       for ((i) = 0; (i) < blessed_n; ++(i))                                   \
+               if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))      \
+                       if (check_supported_reg(vcpu, blessed_reg[i]))
+
+#define for_each_new_reg(i)                                                    \
+       for_each_reg_filtered(i)                                                \
+               if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+#define for_each_present_blessed_reg(i)                                                \
+       for_each_reg(i)                                                         \
+               if (find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+static const char *config_name(struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int len = 0;
+
+       if (c->name)
+               return c->name;
+
+       for_each_sublist(c, s)
+               len += strlen(s->name) + 1;
+
+       c->name = malloc(len);
+
+       len = 0;
+       for_each_sublist(c, s) {
+               if (!strcmp(s->name, "base"))
+                       continue;
+               strcat(c->name + len, s->name);
+               len += strlen(s->name) + 1;
+               c->name[len - 1] = '+';
+       }
+       c->name[len - 1] = '\0';
+
+       return c->name;
+}
+
+bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       return true;
+}
+
+bool __weak filter_reg(__u64 reg)
+{
+       return false;
+}
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+       int i;
+
+       for (i = 0; i < nr_regs; ++i)
+               if (reg == regs[i])
+                       return true;
+       return false;
+}
+
+void __weak print_reg(const char *prefix, __u64 id)
+{
+       printf("\t0x%llx,\n", id);
+}
+
+bool __weak check_reject_set(int err)
+{
+       return true;
+}
+
+void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+}
+
+#ifdef __aarch64__
+static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+{
+       struct vcpu_reg_sublist *s;
+
+       for_each_sublist(c, s)
+               if (s->capability)
+                       init->features[s->feature / 32] |= 1 << (s->feature % 32);
+}
+
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+       struct kvm_vcpu_init init = { .target = -1, };
+       struct kvm_vcpu *vcpu;
+
+       prepare_vcpu_init(c, &init);
+       vcpu = __vm_vcpu_add(vm, 0);
+       aarch64_vcpu_setup(vcpu, &init);
+
+       return vcpu;
+}
+#else
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+       return __vm_vcpu_add(vm, 0);
+}
+#endif
+
+static void check_supported(struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+
+       for_each_sublist(c, s) {
+               if (!s->capability)
+                       continue;
+
+               __TEST_REQUIRE(kvm_has_cap(s->capability),
+                              "%s: %s not available, skipping tests\n",
+                              config_name(c), s->name);
+       }
+}
+
+static bool print_list;
+static bool print_filtered;
+
+static void run_test(struct vcpu_reg_list *c)
+{
+       int new_regs = 0, missing_regs = 0, i, n;
+       int failed_get = 0, failed_set = 0, failed_reject = 0;
+       int skipped_set = 0;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct vcpu_reg_sublist *s;
+
+       check_supported(c);
+
+       vm = vm_create_barebones();
+       vcpu = vcpu_config_get_vcpu(c, vm);
+       finalize_vcpu(vcpu, c);
+
+       reg_list = vcpu_get_reg_list(vcpu);
+
+       if (print_list || print_filtered) {
+               putchar('\n');
+               for_each_reg(i) {
+                       __u64 id = reg_list->reg[i];
+                       if ((print_list && !filter_reg(id)) ||
+                           (print_filtered && filter_reg(id)))
+                               print_reg(config_name(c), id);
+               }
+               putchar('\n');
+               return;
+       }
+
+       for_each_sublist(c, s)
+               blessed_n += s->regs_n;
+       blessed_reg = calloc(blessed_n, sizeof(__u64));
+
+       n = 0;
+       for_each_sublist(c, s) {
+               for (i = 0; i < s->regs_n; ++i)
+                       blessed_reg[n++] = s->regs[i];
+       }
+
+       /*
+        * We only test that we can get the register and then write back the
+        * same value. Some registers may allow other values to be written
+        * back, but others only allow some bits to be changed, and at least
+        * for ID registers set will fail if the value does not exactly match
+        * what was returned by get. If registers that allow other values to
+        * be written need to have the other values tested, then we should
+        * create a new set of tests for those in a new independent test
+        * executable.
+        *
+        * Only do the get/set tests on present, blessed list registers,
+        * since we don't know the capabilities of any new registers.
+        */
+       for_each_present_blessed_reg(i) {
+               uint8_t addr[2048 / 8];
+               struct kvm_one_reg reg = {
+                       .id = reg_list->reg[i],
+                       .addr = (__u64)&addr,
+               };
+               bool reject_reg = false, skip_reg = false;
+               int ret;
+
+               ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
+               if (ret) {
+                       printf("%s: Failed to get ", config_name(c));
+                       print_reg(config_name(c), reg.id);
+                       putchar('\n');
+                       ++failed_get;
+               }
+
+               for_each_sublist(c, s) {
+                       /* rejects_set registers are rejected for set operation */
+                       if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+                               reject_reg = true;
+                               ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+                               if (ret != -1 || !check_reject_set(errno)) {
+                                       printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+                                       print_reg(config_name(c), reg.id);
+                                       putchar('\n');
+                                       ++failed_reject;
+                               }
+                               break;
+                       }
+
+                       /* skips_set registers are skipped for set operation */
+                       if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) {
+                               skip_reg = true;
+                               ++skipped_set;
+                               break;
+                       }
+               }
+
+               if (!reject_reg && !skip_reg) {
+                       ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+                       if (ret) {
+                               printf("%s: Failed to set ", config_name(c));
+                               print_reg(config_name(c), reg.id);
+                               putchar('\n');
+                               ++failed_set;
+                       }
+               }
+       }
+
+       for_each_new_reg(i)
+               ++new_regs;
+
+       for_each_missing_reg(i)
+               ++missing_regs;
+
+       if (new_regs || missing_regs) {
+               n = 0;
+               for_each_reg_filtered(i)
+                       ++n;
+
+               printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+               printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
+                      config_name(c), reg_list->n, reg_list->n - n);
+       }
+
+       if (new_regs) {
+               printf("\n%s: There are %d new registers.\n"
+                      "Consider adding them to the blessed reg "
+                      "list with the following lines:\n\n", config_name(c), new_regs);
+               for_each_new_reg(i)
+                       print_reg(config_name(c), reg_list->reg[i]);
+               putchar('\n');
+       }
+
+       if (missing_regs) {
+               printf("\n%s: There are %d missing registers.\n"
+                      "The following lines are missing registers:\n\n", config_name(c), missing_regs);
+               for_each_missing_reg(i)
+                       print_reg(config_name(c), blessed_reg[i]);
+               putchar('\n');
+       }
+
+       TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+                   "%s: There are %d missing registers; %d registers failed get; "
+                   "%d registers failed set; %d registers failed reject; %d registers skipped set",
+                   config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set);
+
+       pr_info("%s: PASS\n", config_name(c));
+       blessed_n = 0;
+       free(blessed_reg);
+       free(reg_list);
+       kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+       struct vcpu_reg_list *c;
+       int i;
+
+       printf(
+       "\n"
+       "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n"
+       " --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
+       "                             '<selection>' may be\n");
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               printf(
+       "                               '%s'\n", config_name(c));
+       }
+
+       printf(
+       "\n"
+       " --list                      Print the register list rather than test it (requires --config)\n"
+       " --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
+       "\n"
+       );
+}
+
+static struct vcpu_reg_list *parse_config(const char *config)
+{
+       struct vcpu_reg_list *c = NULL;
+       int i;
+
+       if (config[8] != '=')
+               help(), exit(1);
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (strcmp(config_name(c), &config[9]) == 0)
+                       break;
+       }
+
+       if (i == vcpu_configs_n)
+               help(), exit(1);
+
+       return c;
+}
+
+int main(int ac, char **av)
+{
+       struct vcpu_reg_list *c, *sel = NULL;
+       int i, ret = 0;
+       pid_t pid;
+
+       for (i = 1; i < ac; ++i) {
+               if (strncmp(av[i], "--config", 8) == 0)
+                       sel = parse_config(av[i]);
+               else if (strcmp(av[i], "--list") == 0)
+                       print_list = true;
+               else if (strcmp(av[i], "--list-filtered") == 0)
+                       print_filtered = true;
+               else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+                       help(), exit(0);
+               else
+                       help(), exit(1);
+       }
+
+       if (print_list || print_filtered) {
+               /*
+                * We only want to print the register list of a single config.
+                */
+               if (!sel)
+                       help(), exit(1);
+       }
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (sel && c != sel)
+                       continue;
+
+               pid = fork();
+
+               if (!pid) {
+                       run_test(c);
+                       exit(0);
+               } else {
+                       int wstatus;
+                       pid_t wpid = wait(&wstatus);
+                       TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+                       if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+                               ret = KSFT_FAIL;
+               }
+       }
+
+       return ret;
+}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
new file mode 100644 (file)
index 0000000..41230b7
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct guest_vals {
+       uint64_t a;
+       uint64_t b;
+       uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST                                      \
+TYPE(test_type_i64,  I64,  "%ld",   int64_t)           \
+TYPE(test_type_u64,  U64u, "%lu",   uint64_t)          \
+TYPE(test_type_x64,  U64x, "0x%lx", uint64_t)          \
+TYPE(test_type_X64,  U64X, "0x%lX", uint64_t)          \
+TYPE(test_type_u32,  U32u, "%u",    uint32_t)          \
+TYPE(test_type_x32,  U32x, "0x%x",  uint32_t)          \
+TYPE(test_type_X32,  U32X, "0x%X",  uint32_t)          \
+TYPE(test_type_int,  INT,  "%d",    int)               \
+TYPE(test_type_char, CHAR, "%c",    char)              \
+TYPE(test_type_str,  STR,  "'%s'",  const char *)      \
+TYPE(test_type_ptr,  PTR,  "%p",    uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+       TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)                    \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t;    \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead";  \
+static void fn(struct kvm_vcpu *vcpu, T a, T b)                                     \
+{                                                                           \
+       char expected_printf[UCALL_BUFFER_LEN];                              \
+       char expected_assert[UCALL_BUFFER_LEN];                              \
+                                                                            \
+       snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+       snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+       vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext };  \
+       sync_global_to_guest(vcpu->vm, vals);                                \
+       run_test(vcpu, expected_printf, expected_assert);                    \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+               BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+       TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+       while (1) {
+               switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T)                                                        \
+               case TYPE_##ext:                                                \
+                       GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b);         \
+                       __GUEST_ASSERT(vals.a == vals.b,                        \
+                                      ASSERT_FMT_##ext, vals.a, vals.b);       \
+                       break;
+               TYPE_LIST
+#undef TYPE
+               default:
+                       GUEST_SYNC(vals.type);
+               }
+
+               GUEST_DONE();
+       }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info.  Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+       int len_str = strlen(assert_msg);
+       int len_substr = strlen(expected_assert_msg);
+       int offset = len_str - len_substr;
+
+       TEST_ASSERT(len_substr <= len_str,
+                   "Expected '%s' to be a substring of '%s'\n",
+                   assert_msg, expected_assert_msg);
+
+       TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                   expected_assert_msg, &assert_msg[offset]);
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert)
+{
+       struct kvm_run *run = vcpu->run;
+       struct ucall uc;
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                           "Unexpected exit reason: %u (%s),\n",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+                       break;
+               case UCALL_PRINTF:
+                       TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+                                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                                   expected_printf, uc.buffer);
+                       break;
+               case UCALL_ABORT:
+                       ucall_abort(uc.buffer, expected_assert);
+                       break;
+               case UCALL_DONE:
+                       return;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
+
+static void guest_code_limits(void)
+{
+       char test_str[UCALL_BUFFER_LEN + 10];
+
+       memset(test_str, 'a', sizeof(test_str));
+       test_str[sizeof(test_str) - 1] = 0;
+
+       GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+       run = vcpu->run;
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+                   "Unexpected ucall command: %lu,  Expected: %u (UCALL_ABORT)\n",
+                   uc.cmd, UCALL_ABORT);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       test_type_i64(vcpu, -1, -1);
+       test_type_i64(vcpu, -1,  1);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+       test_type_int(vcpu, -1, -1);
+       test_type_int(vcpu, -1,  1);
+       test_type_int(vcpu,  1,  1);
+
+       test_type_char(vcpu, 'a', 'a');
+       test_type_char(vcpu, 'a', 'A');
+       test_type_char(vcpu, 'a', 'b');
+
+       test_type_str(vcpu, "foo", "foo");
+       test_type_str(vcpu, "foo", "bar");
+
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       kvm_vm_free(vm);
+
+       test_limits();
+
+       return 0;
+}
index cb7c03d..b3e9752 100644 (file)
@@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntpct_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
                write_sysreg(cval, cntp_cval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -72,7 +72,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_cval_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -89,7 +89,7 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
                write_sysreg(tval, cntp_tval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -105,7 +105,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
                write_sysreg(ctl, cntp_ctl_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -119,7 +119,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_ctl_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
new file mode 100644 (file)
index 0000000..4b68f37
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
index eb1ff59..a18db6a 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/kvm.h>
 #include "linux/rbtree.h"
+#include <linux/types.h>
 
 #include <asm/atomic.h>
 
@@ -124,6 +125,26 @@ struct kvm_vm {
        uint32_t memslots[NR_MEM_REGIONS];
 };
 
+struct vcpu_reg_sublist {
+       const char *name;
+       long capability;
+       int feature;
+       bool finalize;
+       __u64 *regs;
+       __u64 regs_n;
+       __u64 *rejects_set;
+       __u64 rejects_set_n;
+       __u64 *skips_set;
+       __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+       char *name;
+       struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s)         \
+       for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
 
 #define kvm_for_each_vcpu(vm, i, vcpu)                 \
        for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
index d00d213..5b62a3d 100644 (file)
@@ -38,6 +38,9 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
                                             KVM_REG_RISCV_TIMER_REG(name), \
                                             KVM_REG_SIZE_U64)
 
+#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \
+                                            idx, KVM_REG_SIZE_ULONG)
+
 /* L3 index Bit[47:39] */
 #define PGTBL_L3_INDEX_MASK                    0x0000FF8000000000ULL
 #define PGTBL_L3_INDEX_SHIFT                   39
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
new file mode 100644 (file)
index 0000000..be46eb3
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+                 KVM_RISCV_SELFTESTS_SBI_UCALL,
+                 uc, 0, 0, 0, 0, 0);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
new file mode 100644 (file)
index 0000000..b231bf2
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
index a6e9f21..7e614ad 100644 (file)
@@ -53,14 +53,13 @@ void test_assert(bool exp, const char *exp_str,
 #define TEST_ASSERT(e, fmt, ...) \
        test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
-#define ASSERT_EQ(a, b) do { \
-       typeof(a) __a = (a); \
-       typeof(b) __b = (b); \
-       TEST_ASSERT(__a == __b, \
-                   "ASSERT_EQ(%s, %s) failed.\n" \
-                   "\t%s is %#lx\n" \
-                   "\t%s is %#lx", \
-                   #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b)                                           \
+do {                                                                   \
+       typeof(a) __a = (a);                                            \
+       typeof(b) __b = (b);                                            \
+       test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__,       \
+                   "%#lx != %#lx (%s != %s)",                          \
+                   (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
 } while (0)
 
 #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do {               \
@@ -186,4 +185,9 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
        return num;
 }
 
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
+char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
index 1a6aaef..112bc1d 100644 (file)
@@ -7,21 +7,25 @@
 #ifndef SELFTEST_KVM_UCALL_COMMON_H
 #define SELFTEST_KVM_UCALL_COMMON_H
 #include "test_util.h"
+#include "ucall.h"
 
 /* Common ucalls */
 enum {
        UCALL_NONE,
        UCALL_SYNC,
        UCALL_ABORT,
+       UCALL_PRINTF,
        UCALL_DONE,
        UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
 
 struct ucall {
        uint64_t cmd;
        uint64_t args[UCALL_MAX_ARGS];
+       char buffer[UCALL_BUFFER_LEN];
 
        /* Host virtual address of this struct. */
        struct ucall *hva;
@@ -32,8 +36,12 @@ void ucall_arch_do_ucall(vm_vaddr_t uc);
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
 void ucall(uint64_t cmd, int nargs, ...);
+void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
 
 /*
  * Perform userspace call without any associated data.  This bare call avoids
@@ -46,8 +54,11 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
                                ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)      ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
 #define GUEST_DONE()           ucall(UCALL_DONE, 0)
 
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
 enum guest_assert_builtin_args {
        GUEST_ERROR_STRING,
        GUEST_FILE,
@@ -55,70 +66,41 @@ enum guest_assert_builtin_args {
        GUEST_ASSERT_BUILTIN_NARGS
 };
 
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...)         \
-do {                                                                   \
-       if (!(_condition))                                              \
-               ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \
-                     "Failed guest assert: " _condstr,                 \
-                     __FILE__, __LINE__, ##_args);                     \
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...)                             \
+do {                                                                                   \
+       if (!(_condition))                                                              \
+               ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args);     \
 } while (0)
 
-#define GUEST_ASSERT(_condition) \
-       __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
-       __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
-       __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-       __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-       __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+#define __GUEST_ASSERT(_condition, _fmt, _args...)                             \
+       ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
 
-#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...)                   \
-       TEST_FAIL("%s at %s:%ld\n" fmt,                                 \
-                 (const char *)(_ucall).args[GUEST_ERROR_STRING],      \
-                 (const char *)(_ucall).args[GUEST_FILE],              \
-                 (_ucall).args[GUEST_LINE],                            \
-                 ##_args)
+#define GUEST_ASSERT(_condition)                                               \
+       __GUEST_ASSERT(_condition, #_condition)
 
-#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i])
+#define GUEST_FAIL(_fmt, _args...)                                             \
+       ucall_assert(UCALL_ABORT, "Unconditional guest failure",                \
+                    __FILE__, __LINE__, _fmt, ##_args)
 
-#define REPORT_GUEST_ASSERT(ucall)             \
-       __REPORT_GUEST_ASSERT((ucall), "")
-
-#define REPORT_GUEST_ASSERT_1(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0))
-
-#define REPORT_GUEST_ASSERT_2(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1))
-
-#define REPORT_GUEST_ASSERT_3(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2))
+#define GUEST_ASSERT_EQ(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_4(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2),     \
-                             GUEST_ASSERT_ARG((ucall), 3))
+#define GUEST_ASSERT_NE(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...)     \
-       __REPORT_GUEST_ASSERT((ucall), fmt, ##args)
+#define REPORT_GUEST_ASSERT(ucall)                                             \
+       test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING],      \
+                   (const char *)(ucall).args[GUEST_FILE],                     \
+                   (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
 
 #endif /* SELFTEST_KVM_UCALL_COMMON_H */
index aa434c8..4fd0421 100644 (file)
@@ -239,7 +239,12 @@ struct kvm_x86_cpu_property {
 #define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
 #define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
 #define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
 #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
 
 #define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
new file mode 100644 (file)
index 0000000..06b244b
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
index b3b00be..69f26d8 100644 (file)
@@ -200,7 +200,7 @@ static void *vcpu_worker(void *data)
                if (READ_ONCE(host_quit))
                        return NULL;
 
-               clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+               clock_gettime(CLOCK_MONOTONIC, &start);
                ret = _vcpu_run(vcpu);
                ts_diff = timespec_elapsed(start);
 
@@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        /* Test the stage of KVM creating mappings */
        *current_stage = KVM_CREATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_UPDATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_ADJUST_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
index f212bd8..ddab0ce 100644 (file)
@@ -6,11 +6,7 @@
  */
 #include "kvm_util.h"
 
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
 
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
@@ -23,11 +19,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
        write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644 (file)
index 0000000..c4a69d8
--- /dev/null
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do {                                   \
+       GUEST_ASSERT(str < end);        \
+       *str++ = (v);                   \
+} while (0)
+
+static int isdigit(int ch)
+{
+       return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+       int i = 0;
+
+       while (isdigit(**s))
+               i = i * 10 + *((*s)++) - '0';
+       return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SMALL  32              /* Must be 32 == 0x20 */
+#define SPECIAL        64              /* 0x */
+
+#define __do_div(n, base)                              \
+({                                                     \
+       int __res;                                      \
+                                                       \
+       __res = ((uint64_t) n) % (uint32_t) base;       \
+       n = ((uint64_t) n) / (uint32_t) base;           \
+       __res;                                          \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+                   int precision, int type)
+{
+       /* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+       static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+       char tmp[66];
+       char c, sign, locase;
+       int i;
+
+       /*
+        * locase = 0 or 0x20. ORing digits or letters with 'locase'
+        * produces same digits or (maybe lowercased) letters
+        */
+       locase = (type & SMALL);
+       if (type & LEFT)
+               type &= ~ZEROPAD;
+       if (base < 2 || base > 16)
+               return NULL;
+       c = (type & ZEROPAD) ? '0' : ' ';
+       sign = 0;
+       if (type & SIGN) {
+               if (num < 0) {
+                       sign = '-';
+                       num = -num;
+                       size--;
+               } else if (type & PLUS) {
+                       sign = '+';
+                       size--;
+               } else if (type & SPACE) {
+                       sign = ' ';
+                       size--;
+               }
+       }
+       if (type & SPECIAL) {
+               if (base == 16)
+                       size -= 2;
+               else if (base == 8)
+                       size--;
+       }
+       i = 0;
+       if (num == 0)
+               tmp[i++] = '0';
+       else
+               while (num != 0)
+                       tmp[i++] = (digits[__do_div(num, base)] | locase);
+       if (i > precision)
+               precision = i;
+       size -= precision;
+       if (!(type & (ZEROPAD + LEFT)))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, ' ');
+       if (sign)
+               APPEND_BUFFER_SAFE(str, end, sign);
+       if (type & SPECIAL) {
+               if (base == 8)
+                       APPEND_BUFFER_SAFE(str, end, '0');
+               else if (base == 16) {
+                       APPEND_BUFFER_SAFE(str, end, '0');
+                       APPEND_BUFFER_SAFE(str, end, 'x');
+               }
+       }
+       if (!(type & LEFT))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, c);
+       while (i < precision--)
+               APPEND_BUFFER_SAFE(str, end, '0');
+       while (i-- > 0)
+               APPEND_BUFFER_SAFE(str, end, tmp[i]);
+       while (size-- > 0)
+               APPEND_BUFFER_SAFE(str, end, ' ');
+
+       return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+       char *str, *end;
+       const char *s;
+       uint64_t num;
+       int i, base;
+       int len;
+
+       int flags;              /* flags to number() */
+
+       int field_width;        /* width of output field */
+       int precision;          /*
+                                * min. # of digits for integers; max
+                                * number of chars for from string
+                                */
+       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
+
+       end = buf + n;
+       GUEST_ASSERT(buf < end);
+       GUEST_ASSERT(n > 0);
+
+       for (str = buf; *fmt; ++fmt) {
+               if (*fmt != '%') {
+                       APPEND_BUFFER_SAFE(str, end, *fmt);
+                       continue;
+               }
+
+               /* process flags */
+               flags = 0;
+repeat:
+               ++fmt;          /* this also skips first '%' */
+               switch (*fmt) {
+               case '-':
+                       flags |= LEFT;
+                       goto repeat;
+               case '+':
+                       flags |= PLUS;
+                       goto repeat;
+               case ' ':
+                       flags |= SPACE;
+                       goto repeat;
+               case '#':
+                       flags |= SPECIAL;
+                       goto repeat;
+               case '0':
+                       flags |= ZEROPAD;
+                       goto repeat;
+               }
+
+               /* get field width */
+               field_width = -1;
+               if (isdigit(*fmt))
+                       field_width = skip_atoi(&fmt);
+               else if (*fmt == '*') {
+                       ++fmt;
+                       /* it's the next argument */
+                       field_width = va_arg(args, int);
+                       if (field_width < 0) {
+                               field_width = -field_width;
+                               flags |= LEFT;
+                       }
+               }
+
+               /* get the precision */
+               precision = -1;
+               if (*fmt == '.') {
+                       ++fmt;
+                       if (isdigit(*fmt))
+                               precision = skip_atoi(&fmt);
+                       else if (*fmt == '*') {
+                               ++fmt;
+                               /* it's the next argument */
+                               precision = va_arg(args, int);
+                       }
+                       if (precision < 0)
+                               precision = 0;
+               }
+
+               /* get the conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+                       qualifier = *fmt;
+                       ++fmt;
+               }
+
+               /* default base */
+               base = 10;
+
+               switch (*fmt) {
+               case 'c':
+                       if (!(flags & LEFT))
+                               while (--field_width > 0)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       APPEND_BUFFER_SAFE(str, end,
+                                           (uint8_t)va_arg(args, int));
+                       while (--field_width > 0)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 's':
+                       s = va_arg(args, char *);
+                       len = strnlen(s, precision);
+
+                       if (!(flags & LEFT))
+                               while (len < field_width--)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       for (i = 0; i < len; ++i)
+                               APPEND_BUFFER_SAFE(str, end, *s++);
+                       while (len < field_width--)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 'p':
+                       if (field_width == -1) {
+                               field_width = 2 * sizeof(void *);
+                               flags |= SPECIAL | SMALL | ZEROPAD;
+                       }
+                       str = number(str, end,
+                                    (uint64_t)va_arg(args, void *), 16,
+                                    field_width, precision, flags);
+                       continue;
+
+               case 'n':
+                       if (qualifier == 'l') {
+                               long *ip = va_arg(args, long *);
+                               *ip = (str - buf);
+                       } else {
+                               int *ip = va_arg(args, int *);
+                               *ip = (str - buf);
+                       }
+                       continue;
+
+               case '%':
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       continue;
+
+               /* integer number formats - set up the flags and "break" */
+               case 'o':
+                       base = 8;
+                       break;
+
+               case 'x':
+                       flags |= SMALL;
+               case 'X':
+                       base = 16;
+                       break;
+
+               case 'd':
+               case 'i':
+                       flags |= SIGN;
+               case 'u':
+                       break;
+
+               default:
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       if (*fmt)
+                               APPEND_BUFFER_SAFE(str, end, *fmt);
+                       else
+                               --fmt;
+                       continue;
+               }
+               if (qualifier == 'l')
+                       num = va_arg(args, uint64_t);
+               else if (qualifier == 'h') {
+                       num = (uint16_t)va_arg(args, int);
+                       if (flags & SIGN)
+                               num = (int16_t)num;
+               } else if (flags & SIGN)
+                       num = va_arg(args, int);
+               else
+                       num = va_arg(args, uint32_t);
+               str = number(str, end, num, base, field_width, precision, flags);
+       }
+
+       GUEST_ASSERT(str < end);
+       *str = '\0';
+       return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+       va_list va;
+       int len;
+
+       va_start(va, fmt);
+       len = guest_vsnprintf(buf, n, fmt, va);
+       va_end(va);
+
+       return len;
+}
index 9741a7f..7a8af18 100644 (file)
@@ -312,6 +312,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
                                     uint32_t nr_runnable_vcpus,
                                     uint64_t extra_mem_pages)
 {
+       uint64_t page_size = vm_guest_mode_params[mode].page_size;
        uint64_t nr_pages;
 
        TEST_ASSERT(nr_runnable_vcpus,
@@ -340,6 +341,9 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
         */
        nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
 
+       /* Account for the number of pages needed by ucall. */
+       nr_pages += ucall_nr_pages_required(page_size);
+
        return vm_adjust_num_guest_pages(mode, nr_pages);
 }
 
@@ -994,7 +998,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
        if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
                alignment = max(backing_src_pagesz, alignment);
 
-       ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+       TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
 
        /* Add enough memory to align up if necessary */
        if (alignment > 1)
index 9a3476a..fe6d100 100644 (file)
 #include "kvm_util.h"
 #include "processor.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
                        unsigned long arg1, unsigned long arg2,
                        unsigned long arg3, unsigned long arg4,
@@ -40,13 +36,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
        return ret;
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
-                 KVM_RISCV_SELFTESTS_SBI_UCALL,
-                 uc, 0, 0, 0, 0, 0);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index a7f02dc..cca9873 100644 (file)
@@ -6,16 +6,6 @@
  */
 #include "kvm_util.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index 50e0cf4..88cb6b8 100644 (file)
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
                                tmp = node_prev(s, nodep);
 
                        node_rm(s, nodep);
-                       nodep = NULL;
 
                        nodep = tmp;
                        reduction_performed = true;
index 632398a..5d1c872 100644 (file)
@@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count)
                *xs++ = c;
        return s;
 }
+
+size_t strnlen(const char *s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
index b772193..3e36019 100644 (file)
@@ -5,6 +5,9 @@
  * Copyright (C) 2020, Google LLC.
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
 #include <assert.h>
 #include <ctype.h>
 #include <limits.h>
@@ -377,3 +380,15 @@ int atoi_paranoid(const char *num_str)
 
        return num;
 }
+
+char *strdup_printf(const char *fmt, ...)
+{
+       va_list ap;
+       char *str;
+
+       va_start(ap, fmt);
+       vasprintf(&str, fmt, ap);
+       va_end(ap);
+
+       return str;
+}
index 2f0e2ea..816a3fa 100644 (file)
@@ -11,6 +11,11 @@ struct ucall_header {
        struct ucall ucalls[KVM_MAX_VCPUS];
 };
 
+int ucall_nr_pages_required(uint64_t page_size)
+{
+       return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
 /*
  * ucall_pool holds per-VM values (global data is duplicated by each VM), it
  * must not be accessed from host code.
@@ -70,6 +75,45 @@ static void ucall_free(struct ucall *uc)
        clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
 }
 
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+       WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+       WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
 void ucall(uint64_t cmd, int nargs, ...)
 {
        struct ucall *uc;
index d4a0b50..d828837 100644 (file)
@@ -1074,11 +1074,6 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
        return true;
 }
 
-void kvm_exit_unexpected_vector(uint32_t value)
-{
-       ucall(UCALL_UNHANDLED, 1, value);
-}
-
 void route_exception(struct ex_regs *regs)
 {
        typedef void(*handler)(struct ex_regs *);
@@ -1092,7 +1087,10 @@ void route_exception(struct ex_regs *regs)
        if (kvm_fixup_exception(regs))
                return;
 
-       kvm_exit_unexpected_vector(regs->vector);
+       ucall_assert(UCALL_UNHANDLED,
+                    "Unhandled exception in guest", __FILE__, __LINE__,
+                    "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+                    regs->vector, regs->rip);
 }
 
 void vm_init_descriptor_tables(struct kvm_vm *vm)
@@ -1135,12 +1133,8 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
 {
        struct ucall uc;
 
-       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
-               uint64_t vector = uc.args[0];
-
-               TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
-                         vector);
-       }
+       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+               REPORT_GUEST_ASSERT(uc);
 }
 
 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
index 4d41dc6..1265cec 100644 (file)
@@ -8,14 +8,38 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-       asm volatile("in %[port], %%al"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
+       /*
+        * FIXME: Revert this hack (the entire commit that added it) once nVMX
+        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
+        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
+        * in particular is problematic) along with RDX and RDI (which are
+        * inputs), and clobber volatile GPRs. *sigh*
+        */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+       "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+       asm volatile("push %%rbp\n\t"
+                    "push %%r15\n\t"
+                    "push %%r14\n\t"
+                    "push %%r13\n\t"
+                    "push %%r12\n\t"
+                    "push %%rbx\n\t"
+                    "push %%rdx\n\t"
+                    "push %%rdi\n\t"
+                    "in %[port], %%al\n\t"
+                    "pop %%rdi\n\t"
+                    "pop %%rdx\n\t"
+                    "pop %%rbx\n\t"
+                    "pop %%r12\n\t"
+                    "pop %%r13\n\t"
+                    "pop %%r14\n\t"
+                    "pop %%r15\n\t"
+                    "pop %%rbp\n\t"
+               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
 }
 
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
index feaf2be..6628dc4 100644 (file)
@@ -55,7 +55,7 @@ static void rendezvous_with_boss(void)
 static void run_vcpu(struct kvm_vcpu *vcpu)
 {
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 }
 
 static void *vcpu_worker(void *data)
index 4210cd2..20eb2e7 100644 (file)
@@ -157,7 +157,7 @@ static void *vcpu_worker(void *__data)
                                goto done;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
@@ -560,7 +560,7 @@ static void guest_code_test_memslot_rw(void)
                     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
                        uint64_t val = *(uint64_t *)ptr;
 
-                       GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+                       GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
                        *(uint64_t *)ptr = 0;
                }
 
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
new file mode 100644 (file)
index 0000000..d8ecacd
--- /dev/null
@@ -0,0 +1,872 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
+
+bool filter_reg(__u64 reg)
+{
+       /*
+        * Some ISA extensions are optional and not present on all host,
+        * but they can't be disabled through ISA_EXT registers when present.
+        * So, to make life easy, just filtering out these kind of registers.
+        */
+       switch (reg & ~REG_MASK) {
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
+               return true;
+       default:
+               break;
+       }
+
+       return false;
+}
+
+bool check_reject_set(int err)
+{
+       return err == EINVAL;
+}
+
+static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext)
+{
+       int ret;
+       unsigned long value;
+
+       ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value);
+       if (ret) {
+               printf("Failed to get ext %d", ext);
+               return false;
+       }
+
+       return !!value;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+
+       /*
+        * Disable all extensions which were enabled by default
+        * if they were available in the risc-v host.
+        */
+       for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
+               __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
+
+       for_each_sublist(c, s) {
+               if (!s->feature)
+                       continue;
+
+               /* Try to enable the desired extension */
+               __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1);
+
+               /* Double check whether the desired extension was enabled */
+               __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature),
+                              "%s not available, skipping tests\n", s->name);
+       }
+}
+
+static const char *config_id_to_str(__u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_config */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               return "KVM_REG_RISCV_CONFIG_REG(isa)";
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               return "KVM_REG_RISCV_CONFIG_REG(marchid)";
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               return "KVM_REG_RISCV_CONFIG_REG(mimpid)";
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
+       }
+
+       /*
+        * Config regs would grow regularly with new pseudo reg added, so
+        * just show raw id to indicate a new pseudo config reg.
+        */
+       return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off);
+}
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_core */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_CORE_REG(regs.pc):
+               return "KVM_REG_RISCV_CORE_REG(regs.pc)";
+       case KVM_REG_RISCV_CORE_REG(regs.ra):
+               return "KVM_REG_RISCV_CORE_REG(regs.ra)";
+       case KVM_REG_RISCV_CORE_REG(regs.sp):
+               return "KVM_REG_RISCV_CORE_REG(regs.sp)";
+       case KVM_REG_RISCV_CORE_REG(regs.gp):
+               return "KVM_REG_RISCV_CORE_REG(regs.gp)";
+       case KVM_REG_RISCV_CORE_REG(regs.tp):
+               return "KVM_REG_RISCV_CORE_REG(regs.tp)";
+       case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.t0));
+       case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.s0));
+       case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.a0));
+       case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2);
+       case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3);
+       case KVM_REG_RISCV_CORE_REG(mode):
+               return "KVM_REG_RISCV_CORE_REG(mode)";
+       }
+
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+#define RISCV_CSR_GENERAL(csr) \
+       "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_AIA(csr) \
+       "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+
+static const char *general_csr_id_to_str(__u64 reg_off)
+{
+       /* reg_off is the offset into struct kvm_riscv_csr */
+       switch (reg_off) {
+       case KVM_REG_RISCV_CSR_REG(sstatus):
+               return RISCV_CSR_GENERAL(sstatus);
+       case KVM_REG_RISCV_CSR_REG(sie):
+               return RISCV_CSR_GENERAL(sie);
+       case KVM_REG_RISCV_CSR_REG(stvec):
+               return RISCV_CSR_GENERAL(stvec);
+       case KVM_REG_RISCV_CSR_REG(sscratch):
+               return RISCV_CSR_GENERAL(sscratch);
+       case KVM_REG_RISCV_CSR_REG(sepc):
+               return RISCV_CSR_GENERAL(sepc);
+       case KVM_REG_RISCV_CSR_REG(scause):
+               return RISCV_CSR_GENERAL(scause);
+       case KVM_REG_RISCV_CSR_REG(stval):
+               return RISCV_CSR_GENERAL(stval);
+       case KVM_REG_RISCV_CSR_REG(sip):
+               return RISCV_CSR_GENERAL(sip);
+       case KVM_REG_RISCV_CSR_REG(satp):
+               return RISCV_CSR_GENERAL(satp);
+       case KVM_REG_RISCV_CSR_REG(scounteren):
+               return RISCV_CSR_GENERAL(scounteren);
+       }
+
+       TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off);
+       return NULL;
+}
+
+static const char *aia_csr_id_to_str(__u64 reg_off)
+{
+       /* reg_off is the offset into struct kvm_riscv_aia_csr */
+       switch (reg_off) {
+       case KVM_REG_RISCV_CSR_AIA_REG(siselect):
+               return RISCV_CSR_AIA(siselect);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+               return RISCV_CSR_AIA(iprio1);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+               return RISCV_CSR_AIA(iprio2);
+       case KVM_REG_RISCV_CSR_AIA_REG(sieh):
+               return RISCV_CSR_AIA(sieh);
+       case KVM_REG_RISCV_CSR_AIA_REG(siph):
+               return RISCV_CSR_AIA(siph);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+               return RISCV_CSR_AIA(iprio1h);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+               return RISCV_CSR_AIA(iprio2h);
+       }
+
+       TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off);
+       return NULL;
+}
+
+static const char *csr_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
+       __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               return general_csr_id_to_str(reg_off);
+       case KVM_REG_RISCV_CSR_AIA:
+               return aia_csr_id_to_str(reg_off);
+       }
+
+       TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype);
+       return NULL;
+}
+
+static const char *timer_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_timer */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_TIMER_REG(frequency):
+               return "KVM_REG_RISCV_TIMER_REG(frequency)";
+       case KVM_REG_RISCV_TIMER_REG(time):
+               return "KVM_REG_RISCV_TIMER_REG(time)";
+       case KVM_REG_RISCV_TIMER_REG(compare):
+               return "KVM_REG_RISCV_TIMER_REG(compare)";
+       case KVM_REG_RISCV_TIMER_REG(state):
+               return "KVM_REG_RISCV_TIMER_REG(state)";
+       }
+
+       TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *fp_f_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct __riscv_f_ext_state */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_FP_F_REG(f[0]) ...
+            KVM_REG_RISCV_FP_F_REG(f[31]):
+               return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off);
+       case KVM_REG_RISCV_FP_F_REG(fcsr):
+               return "KVM_REG_RISCV_FP_F_REG(fcsr)";
+       }
+
+       TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *fp_d_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct __riscv_d_ext_state */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_FP_D_REG(f[0]) ...
+            KVM_REG_RISCV_FP_D_REG(f[31]):
+               return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off);
+       case KVM_REG_RISCV_FP_D_REG(fcsr):
+               return "KVM_REG_RISCV_FP_D_REG(fcsr)";
+       }
+
+       TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *isa_ext_id_to_str(__u64 id)
+{
+       /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+
+       static const char * const kvm_isa_ext_reg_name[] = {
+               "KVM_RISCV_ISA_EXT_A",
+               "KVM_RISCV_ISA_EXT_C",
+               "KVM_RISCV_ISA_EXT_D",
+               "KVM_RISCV_ISA_EXT_F",
+               "KVM_RISCV_ISA_EXT_H",
+               "KVM_RISCV_ISA_EXT_I",
+               "KVM_RISCV_ISA_EXT_M",
+               "KVM_RISCV_ISA_EXT_SVPBMT",
+               "KVM_RISCV_ISA_EXT_SSTC",
+               "KVM_RISCV_ISA_EXT_SVINVAL",
+               "KVM_RISCV_ISA_EXT_ZIHINTPAUSE",
+               "KVM_RISCV_ISA_EXT_ZICBOM",
+               "KVM_RISCV_ISA_EXT_ZICBOZ",
+               "KVM_RISCV_ISA_EXT_ZBB",
+               "KVM_RISCV_ISA_EXT_SSAIA",
+               "KVM_RISCV_ISA_EXT_V",
+               "KVM_RISCV_ISA_EXT_SVNAPOT",
+               "KVM_RISCV_ISA_EXT_ZBA",
+               "KVM_RISCV_ISA_EXT_ZBS",
+               "KVM_RISCV_ISA_EXT_ZICNTR",
+               "KVM_RISCV_ISA_EXT_ZICSR",
+               "KVM_RISCV_ISA_EXT_ZIFENCEI",
+               "KVM_RISCV_ISA_EXT_ZIHPM",
+       };
+
+       if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) {
+               /*
+                * isa_ext regs would grow regularly with new isa extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("%lld /* UNKNOWN */", reg_off);
+       }
+
+       return kvm_isa_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_single_id_to_str(__u64 reg_off)
+{
+       /* reg_off is KVM_RISCV_SBI_EXT_ID */
+       static const char * const kvm_sbi_ext_reg_name[] = {
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR",
+       };
+
+       if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) {
+               /*
+                * sbi_ext regs would grow regularly with new sbi extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
+       }
+
+       return kvm_sbi_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+       if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) {
+               /*
+                * sbi_ext regs would grow regularly with new sbi extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("%lld /* UNKNOWN */", reg_off);
+       }
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+               return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off);
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off);
+       }
+
+       return NULL;
+}
+
+static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
+       __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_SBI_SINGLE:
+               return sbi_ext_single_id_to_str(reg_off);
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
+       }
+
+       TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype);
+       return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+       const char *reg_size = NULL;
+
+       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV,
+                   "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id);
+
+       switch (id & KVM_REG_SIZE_MASK) {
+       case KVM_REG_SIZE_U32:
+               reg_size = "KVM_REG_SIZE_U32";
+               break;
+       case KVM_REG_SIZE_U64:
+               reg_size = "KVM_REG_SIZE_U64";
+               break;
+       case KVM_REG_SIZE_U128:
+               reg_size = "KVM_REG_SIZE_U128";
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+       }
+
+       switch (id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
+                               reg_size, config_id_to_str(id));
+               break;
+       case KVM_REG_RISCV_CORE:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
+                               reg_size, core_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_CSR:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n",
+                               reg_size, csr_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_TIMER:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n",
+                               reg_size, timer_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_FP_F:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n",
+                               reg_size, fp_f_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_FP_D:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
+                               reg_size, fp_d_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_ISA_EXT:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
+                               reg_size, isa_ext_id_to_str(id));
+               break;
+       case KVM_REG_RISCV_SBI_EXT:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
+                               reg_size, sbi_ext_id_to_str(prefix, id));
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix,
+                               (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id);
+       }
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v6.5-rc3 and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0,
+};
+
+/*
+ * The skips_set list registers that should skip set test.
+ *  - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly.
+ */
+static __u64 base_skips_set[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+static __u64 h_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H,
+};
+
+static __u64 zicbom_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM,
+};
+
+static __u64 zicboz_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ,
+};
+
+static __u64 svpbmt_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT,
+};
+
+static __u64 sstc_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC,
+};
+
+static __u64 svinval_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL,
+};
+
+static __u64 zihintpause_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+};
+
+static __u64 zba_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA,
+};
+
+static __u64 zbb_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB,
+};
+
+static __u64 zbs_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS,
+};
+
+static __u64 zicntr_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR,
+};
+
+static __u64 zicsr_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR,
+};
+
+static __u64 zifencei_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI,
+};
+
+static __u64 zihpm_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM,
+};
+
+static __u64 aia_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA,
+};
+
+static __u64 fp_f_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F,
+};
+
+static __u64 fp_d_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D,
+};
+
+#define BASE_SUBLIST \
+       {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
+        .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
+#define H_REGS_SUBLIST \
+       {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),}
+#define ZICBOM_REGS_SUBLIST \
+       {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define ZICBOZ_REGS_SUBLIST \
+       {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
+#define SVPBMT_REGS_SUBLIST \
+       {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),}
+#define SSTC_REGS_SUBLIST \
+       {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),}
+#define SVINVAL_REGS_SUBLIST \
+       {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),}
+#define ZIHINTPAUSE_REGS_SUBLIST \
+       {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),}
+#define ZBA_REGS_SUBLIST \
+       {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),}
+#define ZBB_REGS_SUBLIST \
+       {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),}
+#define ZBS_REGS_SUBLIST \
+       {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),}
+#define ZICNTR_REGS_SUBLIST \
+       {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),}
+#define ZICSR_REGS_SUBLIST \
+       {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),}
+#define ZIFENCEI_REGS_SUBLIST \
+       {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),}
+#define ZIHPM_REGS_SUBLIST \
+       {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),}
+#define AIA_REGS_SUBLIST \
+       {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define FP_F_REGS_SUBLIST \
+       {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
+               .regs_n = ARRAY_SIZE(fp_f_regs),}
+#define FP_D_REGS_SUBLIST \
+       {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
+               .regs_n = ARRAY_SIZE(fp_d_regs),}
+
+static struct vcpu_reg_list h_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       H_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicbom_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICBOM_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicboz_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICBOZ_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list svpbmt_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVPBMT_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list sstc_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SSTC_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list svinval_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVINVAL_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zihintpause_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIHINTPAUSE_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zba_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBA_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zbb_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBB_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zbs_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBS_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicntr_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICNTR_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicsr_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICSR_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zifencei_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIFENCEI_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zihpm_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIHPM_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list aia_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       AIA_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list fp_f_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       FP_F_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list fp_d_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       FP_D_REGS_SUBLIST,
+       {0},
+       },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+       &h_config,
+       &zicbom_config,
+       &zicboz_config,
+       &svpbmt_config,
+       &sstc_config,
+       &svinval_config,
+       &zihintpause_config,
+       &zba_config,
+       &zbb_config,
+       &zbs_config,
+       &zicntr_config,
+       &zicsr_config,
+       &zifencei_config,
+       &zihpm_config,
+       &aia_config,
+       &fp_f_config,
+       &fp_d_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
index 1d73e78..c8e0a64 100644 (file)
@@ -237,8 +237,8 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without CMMA enabled should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, ENXIO);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, ENXIO);
 
        enable_cmma(vm);
        vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
@@ -247,31 +247,31 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without migration mode and without peeking should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        /* GET_CMMA_BITS without migration mode and with peeking should work */
        rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
-       ASSERT_EQ(rc, 0);
-       ASSERT_EQ(errno_out, 0);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(errno_out, 0);
 
        enable_dirty_tracking(vm);
        enable_migration_mode(vm);
 
        /* GET_CMMA_BITS with invalid flags */
        rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        kvm_vm_free(vm);
 }
 
 static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
 {
-       ASSERT_EQ(vcpu->run->exit_reason, 13);
-       ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
 }
 
 static void test_migration_mode(void)
@@ -283,8 +283,8 @@ static void test_migration_mode(void)
 
        /* enabling migration mode on a VM without memory should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -304,8 +304,8 @@ static void test_migration_mode(void)
 
        /* migration mode when memslots have dirty tracking off should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -314,7 +314,7 @@ static void test_migration_mode(void)
 
        /* enabling migration mode should work now */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -350,7 +350,7 @@ static void test_migration_mode(void)
         */
        vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -394,9 +394,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
-       ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
 
        /* ...and then - after a hole - the TEST_DATA memslot should follow */
        args = (struct kvm_s390_cmma_log){
@@ -407,9 +407,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+       TEST_ASSERT_EQ(args.remaining, 0);
 
        /* ...and nothing else should be there */
        args = (struct kvm_s390_cmma_log){
@@ -420,9 +420,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, 0);
-       ASSERT_EQ(args.start_gfn, 0);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, 0);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.remaining, 0);
 }
 
 /**
@@ -498,11 +498,11 @@ static void assert_cmma_dirty(u64 first_dirty_gfn,
                              u64 dirty_gfn_count,
                              const struct kvm_s390_cmma_log *res)
 {
-       ASSERT_EQ(res->start_gfn, first_dirty_gfn);
-       ASSERT_EQ(res->count, dirty_gfn_count);
+       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
        for (size_t i = 0; i < dirty_gfn_count; i++)
-               ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
-       ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
 }
 
 static void test_get_skip_holes(void)
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
new file mode 100644 (file)
index 0000000..84313fb
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+    "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+                                     size_t new_psw_off, uint64_t *new_psw)
+{
+       struct kvm_guest_debug debug = {};
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       char *lowcore;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       lowcore = addr_gpa2hva(vm, 0);
+       new_psw[0] = (*vcpu)->run->psw_mask;
+       new_psw[1] = (uint64_t)int_handler;
+       memcpy(lowcore + new_psw_off, new_psw, 16);
+       vcpu_regs_get(*vcpu, &regs);
+       regs.gprs[2] = -1;
+       vcpu_regs_set(*vcpu, &regs);
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+       vcpu_guest_debug_set(*vcpu, &debug);
+       vcpu_run(*vcpu);
+
+       return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+    "j .\n");
+
+static void test_step_pgm(void)
+{
+       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+    "diag %r0,%r0,0\n"
+    "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+       struct kvm_s390_irq irq = {
+               .type = KVM_S390_PROGRAM_INT,
+               .u.pgm.code = PGM_SPECIFICATION,
+       };
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+                            __LC_PGM_NEW_PSW, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+    "iske %r2,%r2\n"
+    "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+    "lctl %c0,%c0,1\n"
+    "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+    "svc 0\n"
+    "j .\n");
+
+static void test_step_svc(void)
+{
+       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "single-step pgm", test_step_pgm },
+       { "single-step pgm caused by diag", test_step_pgm_diag },
+       { "single-step pgm caused by iske", test_step_pgm_iske },
+       { "single-step pgm caused by lctl", test_step_pgm_lctl },
+       { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+       ksft_finished();
+}
index 8e4b94d..bb3ca9a 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2019, Red Hat, Inc.
  */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -279,10 +278,10 @@ enum stage {
        vcpu_run(__vcpu);                                               \
        get_ucall(__vcpu, &uc);                                         \
        if (uc.cmd == UCALL_ABORT) {                                    \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");           \
+               REPORT_GUEST_ASSERT(uc);                                \
        }                                                               \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                                  \
-       ASSERT_EQ(uc.args[1], __stage);                                 \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
 })                                                                     \
 
 static void prepare_mem12(void)
@@ -469,7 +468,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val)
        case 16:
                return val;
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -598,7 +597,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t
                        return ret;
                }
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -808,7 +807,7 @@ static void test_termination(void)
        HOST_SYNC(t.vcpu, STAGE_IDLED);
        MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
        /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
-       ASSERT_EQ(teid & teid_mask, 0);
+       TEST_ASSERT_EQ(teid & teid_mask, 0);
 
        kvm_vm_free(t.kvm_vm);
 }
index a9a0b76..c73f948 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright IBM Corp. 2021
  */
-
 #include <sys/mman.h>
 #include "test_util.h"
 #include "kvm_util.h"
@@ -156,7 +155,9 @@ static enum stage perform_next_stage(int *i, bool mapped_0)
                       !mapped_0;
                if (!skip) {
                        result = test_protection(tests[*i].addr, tests[*i].key);
-                       GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+                       __GUEST_ASSERT(result == tests[*i].expected,
+                                      "Wanted %u, got %u, for i = %u",
+                                      tests[*i].expected, result, *i);
                }
        }
        return stage;
@@ -190,9 +191,9 @@ static void guest_code(void)
        vcpu_run(__vcpu);                                       \
        get_ucall(__vcpu, &uc);                                 \
        if (uc.cmd == UCALL_ABORT)                              \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");   \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                          \
-       ASSERT_EQ(uc.args[1], __stage);                         \
+               REPORT_GUEST_ASSERT(uc);                        \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
 })
 
 #define HOST_SYNC(vcpu, stage)                 \
index a849ce2..b329601 100644 (file)
@@ -88,7 +88,7 @@ static void *vcpu_worker(void *data)
        }
 
        if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
-               REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+               REPORT_GUEST_ASSERT(uc);
 
        return NULL;
 }
@@ -156,19 +156,22 @@ static void guest_code_move_memory_region(void)
         * window where the memslot is invalid is usually quite small.
         */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the misaligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1 || val == 0, val);
+       __GUEST_ASSERT(val == 1 || val == 0,
+                      "Expected '0' or '1' (no MMIO), got '%llx'", val);
 
        /* Spin until the memory region starts to get re-aligned. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the re-aligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1, val);
+       GUEST_ASSERT_EQ(val, 1);
 
        GUEST_DONE();
 }
@@ -224,15 +227,15 @@ static void guest_code_delete_memory_region(void)
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        /* Spin until the memory region is recreated. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 0, val);
+       GUEST_ASSERT_EQ(val, 0);
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        asm("1:\n\t"
            ".pushsection .rodata\n\t"
@@ -249,7 +252,7 @@ static void guest_code_delete_memory_region(void)
            "final_rip_end: .quad 1b\n\t"
            ".popsection");
 
-       GUEST_ASSERT_1(0, 0);
+       GUEST_ASSERT(0);
 }
 
 static void test_delete_memory_region(void)
index c87f387..171adfb 100644 (file)
@@ -31,8 +31,8 @@ static uint64_t guest_stolen_time[NR_VCPUS];
 static void check_status(struct kvm_steal_time *st)
 {
        GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
-       GUEST_ASSERT(READ_ONCE(st->flags) == 0);
-       GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
 }
 
 static void guest_code(int cpu)
@@ -40,7 +40,7 @@ static void guest_code(int cpu)
        struct kvm_steal_time *st = st_gva[cpu];
        uint32_t version;
 
-       GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+       GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
 
        memset(st, 0, sizeof(*st));
        GUEST_SYNC(0);
@@ -122,8 +122,8 @@ static int64_t smccc(uint32_t func, uint64_t arg)
 
 static void check_status(struct st_time *st)
 {
-       GUEST_ASSERT(READ_ONCE(st->rev) == 0);
-       GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
 }
 
 static void guest_code(int cpu)
@@ -132,15 +132,15 @@ static void guest_code(int cpu)
        int64_t status;
 
        status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
 
        status = smccc(PV_TIME_ST, 0);
-       GUEST_ASSERT(status != -1);
-       GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+       GUEST_ASSERT_NE(status, -1);
+       GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
 
        st = (struct st_time *)status;
        GUEST_SYNC(0);
index d3c3aa9..3b34d81 100644 (file)
@@ -35,10 +35,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
                        guest_cpuid->entries[i].index,
                        &eax, &ebx, &ecx, &edx);
 
-               GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
-                            ebx == guest_cpuid->entries[i].ebx &&
-                            ecx == guest_cpuid->entries[i].ecx &&
-                            edx == guest_cpuid->entries[i].edx);
+               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
        }
 
 }
@@ -51,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid)
 
        GUEST_SYNC(2);
 
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001);
+       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
 
        GUEST_DONE();
 }
@@ -116,7 +116,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
        case UCALL_DONE:
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
index beb7e2c..634c6bf 100644 (file)
@@ -72,7 +72,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 
                vcpu_run(vcpu);
 
-               ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
 
                vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
 
@@ -179,12 +179,12 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * with that capability.
         */
        if (dirty_log_manual_caps) {
-               ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+               TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
        } else {
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
        }
 
        /*
@@ -192,9 +192,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * memory again, the page counts should be the same as they were
         * right after initial population of memory.
         */
-       ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
-       ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
-       ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+       TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
 }
 
 static void help(char *name)
index e334844..6c2e5e0 100644 (file)
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
        vcpu_run(vcpu);
        handle_flds_emulation_failure_exit(vcpu);
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
        kvm_vm_free(vm);
        return 0;
index 73af44d..e036db1 100644 (file)
@@ -8,7 +8,6 @@
  * Copyright 2022 Google LLC
  * Author: Vipin Sharma <vipinsh@google.com>
  */
-
 #include "kvm_util.h"
 #include "processor.h"
 #include "hyperv.h"
@@ -84,7 +83,7 @@ int main(void)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
index 78606de..9f28aa2 100644 (file)
@@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr)
                vector = rdmsr_safe(msr->idx, &msr_val);
 
        if (msr->fault_expected)
-               GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR);
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
        else
-               GUEST_ASSERT_3(!vector, msr->idx, vector, 0);
+               __GUEST_ASSERT(!vector,
+                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
 
        if (vector || is_write_only_msr(msr->idx))
                goto done;
 
        if (msr->write)
-               GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx,
-                              msr_val, msr->write_val);
+               __GUEST_ASSERT(!vector,
+                              "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'",
+                              msr->idx, msr->write_val, msr_val);
 
        /* Invariant TSC bit appears when TSC invariant control MSR is written to */
        if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
@@ -82,7 +87,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
        u64 res, input, output;
        uint8_t vector;
 
-       GUEST_ASSERT(hcall->control);
+       GUEST_ASSERT_NE(hcall->control, 0);
 
        wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
        wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
@@ -96,10 +101,14 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 
        vector = __hyperv_hypercall(hcall->control, input, output, &res);
        if (hcall->ud_expected) {
-               GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+               __GUEST_ASSERT(vector == UD_VECTOR,
+                              "Expected #UD for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
        } else {
-               GUEST_ASSERT_2(!vector, hcall->control, vector);
-               GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+               __GUEST_ASSERT(!vector,
+                              "Expected no exception for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
+               GUEST_ASSERT_EQ(res, hcall->expect);
        }
 
        GUEST_DONE();
@@ -495,7 +504,7 @@ static void guest_test_msrs_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
@@ -665,7 +674,7 @@ static void guest_test_hcalls_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
index f774a9e..9e2879a 100644 (file)
@@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr)
        PR_MSR(msr);
 
        vector = rdmsr_safe(msr->idx, &ignored);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 
        vector = wrmsr_safe(msr->idx, 0);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 }
 
 struct hcall_data {
@@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc)
 
        PR_HCALL(hc);
        r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
-       GUEST_ASSERT(r == -KVM_ENOSYS);
+       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
 }
 
 static void guest_main(void)
@@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
                        pr_hcall(&uc);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        return;
index 7281264..80aa3d8 100644 (file)
@@ -16,14 +16,25 @@ enum monitor_mwait_testcases {
        MWAIT_DISABLED = BIT(2),
 };
 
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
+do {                                                                   \
+       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
+                           ((testcase) & MWAIT_DISABLED);              \
+                                                                       \
+       if (fault_wanted)                                               \
+               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
+                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \
+       else                                                            \
+               __GUEST_ASSERT(!(vector),                               \
+                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \
+} while (0)
+
 static void guest_monitor_wait(int testcase)
 {
-       /*
-        * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
-        * in all other scenarios KVM should emulate them as nops.
-        */
-       bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
-                           (testcase & MWAIT_DISABLED);
        u8 vector;
 
        GUEST_SYNC(testcase);
@@ -33,16 +44,10 @@ static void guest_monitor_wait(int testcase)
         * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
         */
        vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
 
        vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
 }
 
 static void guest_code(void)
@@ -85,7 +90,7 @@ int main(int argc, char *argv[])
                        testcase = uc.args[1];
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+                       REPORT_GUEST_ASSERT(uc);
                        goto done;
                case UCALL_DONE:
                        goto done;
index 6502aa2..3670331 100644 (file)
@@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
                            "Expected L2 to ask for %d, L2 says it's done", vector);
                break;
        case UCALL_ABORT:
-               TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
-                         (const char *)uc.args[0], __FILE__, uc.args[1],
-                         uc.args[2], uc.args[3]);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
@@ -247,12 +245,12 @@ int main(int argc, char *argv[])
 
        /* Verify the pending events comes back out the same as it went in. */
        vcpu_events_get(vcpu, &events);
-       ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
-                 KVM_VCPUEVENT_VALID_PAYLOAD);
-       ASSERT_EQ(events.exception.pending, true);
-       ASSERT_EQ(events.exception.nr, SS_VECTOR);
-       ASSERT_EQ(events.exception.has_error_code, true);
-       ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+                       KVM_VCPUEVENT_VALID_PAYLOAD);
+       TEST_ASSERT_EQ(events.exception.pending, true);
+       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+       TEST_ASSERT_EQ(events.exception.has_error_code, true);
+       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
 
        /*
         * Run for real with the pending #SS, L1 should get a VM-Exit due to
index 40507ed..283cc55 100644 (file)
 #define ARCH_PERFMON_BRANCHES_RETIRED          5
 
 #define NUM_BRANCHES 42
+#define INTEL_PMC_IDX_FIXED            32
+
+/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
+#define MAX_FILTER_EVENTS              300
+#define MAX_TEST_EVENTS                10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (MAX_FILTER_EVENTS + 1)
 
 /*
  * This is how the event selector and unit mask are stored in an AMD
 
 #define INST_RETIRED EVENT(0xc0, 0)
 
+struct __kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u32 fixed_counter_bitmap;
+       __u32 flags;
+       __u32 pad[4];
+       __u64 events[MAX_FILTER_EVENTS];
+};
+
 /*
  * This event list comprises Intel's eight architectural events plus
  * AMD's "retired branch instructions" for Zen[123] (and possibly
  * other AMD CPUs).
  */
-static const uint64_t event_list[] = {
-       EVENT(0x3c, 0),
-       INST_RETIRED,
-       EVENT(0x3c, 1),
-       EVENT(0x2e, 0x4f),
-       EVENT(0x2e, 0x41),
-       EVENT(0xc4, 0),
-       EVENT(0xc5, 0),
-       EVENT(0xa4, 1),
-       AMD_ZEN_BR_RETIRED,
+static const struct __kvm_pmu_event_filter base_event_filter = {
+       .nevents = ARRAY_SIZE(base_event_filter.events),
+       .events = {
+               EVENT(0x3c, 0),
+               INST_RETIRED,
+               EVENT(0x3c, 1),
+               EVENT(0x2e, 0x4f),
+               EVENT(0x2e, 0x41),
+               EVENT(0xc4, 0),
+               EVENT(0xc5, 0),
+               EVENT(0xa4, 1),
+               AMD_ZEN_BR_RETIRED,
+       },
 };
 
 struct {
@@ -225,48 +246,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
        return !r;
 }
 
-static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
-{
-       struct kvm_pmu_event_filter *f;
-       int size = sizeof(*f) + nevents * sizeof(f->events[0]);
-
-       f = malloc(size);
-       TEST_ASSERT(f, "Out of memory");
-       memset(f, 0, size);
-       f->nevents = nevents;
-       return f;
-}
-
-
-static struct kvm_pmu_event_filter *
-create_pmu_event_filter(const uint64_t event_list[], int nevents,
-                       uint32_t action, uint32_t flags)
-{
-       struct kvm_pmu_event_filter *f;
-       int i;
-
-       f = alloc_pmu_event_filter(nevents);
-       f->action = action;
-       f->flags = flags;
-       for (i = 0; i < nevents; i++)
-               f->events[i] = event_list[i];
-
-       return f;
-}
-
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
-{
-       return create_pmu_event_filter(event_list,
-                                      ARRAY_SIZE(event_list),
-                                      action, 0);
-}
-
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
  */
-static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
-                                                uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
 {
        bool found = false;
        int i;
@@ -279,7 +263,6 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
        }
        if (found)
                f->nevents--;
-       return f;
 }
 
 #define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
@@ -315,66 +298,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu)
 }
 
 static void test_with_filter(struct kvm_vcpu *vcpu,
-                            struct kvm_pmu_event_filter *f)
+                            struct __kvm_pmu_event_filter *__f)
 {
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
        vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
        run_vcpu_and_sync_pmc_results(vcpu);
 }
 
 static void test_amd_deny_list(struct kvm_vcpu *vcpu)
 {
-       uint64_t event = EVENT(0x1C2, 0);
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .action = KVM_PMU_EVENT_DENY,
+               .nevents = 1,
+               .events = {
+                       EVENT(0x1C2, 0),
+               },
+       };
 
-       f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
-       test_with_filter(vcpu, f);
-       free(f);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_ALLOW;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
@@ -569,19 +559,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
                                   const uint64_t masked_events[],
                                   const int nmasked_events)
 {
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .nevents = nmasked_events,
+               .action = KVM_PMU_EVENT_ALLOW,
+               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+       };
 
-       f = create_pmu_event_filter(masked_events, nmasked_events,
-                                   KVM_PMU_EVENT_ALLOW,
-                                   KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
-       test_with_filter(vcpu, f);
-       free(f);
+       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+       test_with_filter(vcpu, &f);
 }
 
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS      300
-#define MAX_TEST_EVENTS                10
-
 #define ALLOW_LOADS            BIT(0)
 #define ALLOW_STORES           BIT(1)
 #define ALLOW_LOADS_STORES     BIT(2)
@@ -753,21 +740,33 @@ static void test_masked_events(struct kvm_vcpu *vcpu)
        run_masked_events_tests(vcpu, events, nevents);
 }
 
-static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events,
-                          int nevents, uint32_t flags)
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+                               struct __kvm_pmu_event_filter *__f)
 {
-       struct kvm_pmu_event_filter *f;
-       int r;
+       struct kvm_pmu_event_filter *f = (void *)__f;
 
-       f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags);
-       r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-       free(f);
+       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
 
-       return r;
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+                                      uint32_t flags, uint32_t action)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = 1,
+               .flags = flags,
+               .action = action,
+               .events = {
+                       event,
+               },
+       };
+
+       return set_pmu_event_filter(vcpu, &f);
 }
 
 static void test_filter_ioctl(struct kvm_vcpu *vcpu)
 {
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct __kvm_pmu_event_filter f;
        uint64_t e = ~0ul;
        int r;
 
@@ -775,15 +774,144 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
         * Unfortunately having invalid bits set in event data is expected to
         * pass when flags == 0 (bits other than eventsel+umask).
         */
-       r = run_filter_test(vcpu, &e, 1, 0);
+       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
 
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
 
        e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       f = base_event_filter;
+       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+       f = base_event_filter;
+       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+       f = base_event_filter;
+       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+       f = base_event_filter;
+       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+
+               /* Only OS_EN bit is enabled for fixed counter[idx]. */
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
+                     BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+       }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+                                              uint32_t action, uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = action,
+               .fixed_counter_bitmap = bitmap,
+       };
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+                                                  uint32_t action,
+                                                  uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = action;
+       f.fixed_counter_bitmap = bitmap;
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+                                       uint8_t nr_fixed_counters)
+{
+       unsigned int i;
+       uint32_t bitmap;
+       uint64_t count;
+
+       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+                   "Invalid nr_fixed_counters");
+
+       /*
+        * Check the fixed performance counter can count normally when KVM
+        * userspace doesn't set any pmu filter.
+        */
+       count = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+
+       for (i = 0; i < BIT(nr_fixed_counters); i++) {
+               bitmap = BIT(i);
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+               /*
+                * Check that fixed_counter_bitmap has higher priority than
+                * events[] when both are set.
+                */
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_ALLOW,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_DENY,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+       }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint8_t idx;
+
+       /*
+        * Check that pmu_event_filter works as expected when it's applied to
+        * fixed performance counters.
+        */
+       for (idx = 0; idx < nr_fixed_counters; idx++) {
+               vm = vm_create_with_one_vcpu(&vcpu,
+                                            intel_run_fixed_counter_guest_code);
+               vcpu_args_set(vcpu, 1, idx);
+               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+               kvm_vm_free(vm);
+       }
 }
 
 int main(int argc, char *argv[])
@@ -829,6 +957,7 @@ int main(int argc, char *argv[])
        kvm_vm_free(vm);
 
        test_pmu_config_disable(guest_code);
+       test_fixed_counter_bitmap();
 
        return 0;
 }
index 4c416eb..cbc92a8 100644 (file)
@@ -57,7 +57,7 @@ int main(void)
        for (i = 0; i < KVM_MAX_VCPUS; i++)
                vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
 
-       ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
 
        vcpuN = vcpus[KVM_MAX_VCPUS - 1];
        for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
@@ -65,8 +65,8 @@ int main(void)
                vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
        }
 
-       ASSERT_EQ(pthread_cancel(thread), 0);
-       ASSERT_EQ(pthread_join(thread, NULL), 0);
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
 
        kvm_vm_free(vm);
 
index b25d755..366cf18 100644 (file)
@@ -20,7 +20,7 @@ static void guest_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() != 0);
+       GUEST_ASSERT_NE(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -29,7 +29,7 @@ static void guest_not_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() == 0);
+       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -65,7 +65,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
                                        stage);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                default:
                        TEST_ASSERT(false, "Unexpected exit: %s",
                                    exit_reason_str(vcpu->run->exit_reason));
index 4e24797..7ee4449 100644 (file)
@@ -8,7 +8,6 @@
  *   Copyright (C) 2021, Red Hat, Inc.
  *
  */
-
 #include <stdatomic.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -34,13 +33,12 @@ static void l2_guest_code_int(void);
 static void guest_int_handler(struct ex_regs *regs)
 {
        int_fired++;
-       GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
-                      regs->rip, (unsigned long)l2_guest_code_int);
+       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
 }
 
 static void l2_guest_code_int(void)
 {
-       GUEST_ASSERT_1(int_fired == 1, int_fired);
+       GUEST_ASSERT_EQ(int_fired, 1);
 
        /*
          * Same as the vmmcall() function, but with a ud2 sneaked after the
@@ -53,7 +51,7 @@ static void l2_guest_code_int(void)
                              : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
                                "r10", "r11", "r12", "r13", "r14", "r15");
 
-       GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+       GUEST_ASSERT_EQ(bp_fired, 1);
        hlt();
 }
 
@@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs)
 
        if (nmi_stage_get() == 1) {
                vmmcall();
-               GUEST_ASSERT(false);
+               GUEST_FAIL("Unexpected resume after VMMCALL");
        } else {
-               GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
                GUEST_DONE();
        }
 }
@@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        }
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
                clgi();
                x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
 
-               GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
                nmi_stage_inc();
 
                stgi();
@@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        vmcb->control.next_rip = vmcb->save.rip + 2;
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -185,7 +185,7 @@ static void run_test(bool is_nmi)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
                /* NOT REACHED */
        case UCALL_DONE:
index 2da89fd..00965ba 100644 (file)
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <pthread.h>
 
 #include "test_util.h"
 #include "kvm_util.h"
@@ -80,6 +81,133 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
 #define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
 #define INVALID_SYNC_FIELD 0x80000000
 
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.injected, 1);
+               WRITE_ONCE(events->exception.pending, 1);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events.  KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.nr, UD_VECTOR);
+               WRITE_ONCE(events->exception.pending, 1);
+               WRITE_ONCE(events->exception.nr, 255);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       __u64 *cr4 = &run->s.regs.sregs.cr4;
+       __u64 pae_enabled = *cr4;
+       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+               WRITE_ONCE(*cr4, pae_enabled);
+               asm volatile(".rept 512\n\t"
+                            "nop\n\t"
+                            ".endr");
+               WRITE_ONCE(*cr4, pae_disabled);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+static void race_sync_regs(void *racer)
+{
+       const time_t TIMEOUT = 2; /* seconds, roughly */
+       struct kvm_x86_state *state;
+       struct kvm_translation tr;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       time_t t;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       run->kvm_valid_regs = 0;
+
+       /* Save state *before* spawning the thread that mucks with vCPU state. */
+       state = vcpu_save_state(vcpu);
+
+       /*
+        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+        * should already be set in guest state.
+        */
+       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+                   (run->s.regs.sregs.efer & EFER_LME),
+                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+                   !!(run->s.regs.sregs.efer & EFER_LME));
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               /*
+                * Reload known good state if the vCPU triple faults, e.g. due
+                * to the unhandled #GPs being injected.  VMX preserves state
+                * on shutdown, but SVM synthesizes an INIT as the VMCB state
+                * is architecturally undefined on triple fault.
+                */
+               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+                       vcpu_load_state(vcpu, state);
+
+               if (racer == race_sregs_cr4) {
+                       tr = (struct kvm_translation) { .linear_address = 0 };
+                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+               }
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_x86_state_cleanup(state);
+       kvm_vm_free(vm);
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_vcpu *vcpu;
@@ -218,5 +346,9 @@ int main(int argc, char *argv[])
 
        kvm_vm_free(vm);
 
+       race_sync_regs(race_sregs_cr4);
+       race_sync_regs(race_events_exc);
+       race_sync_regs(race_events_inj_pen);
+
        return 0;
 }
index c9f6770..12b0964 100644 (file)
@@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
                ksft_test_result_pass("stage %d passed\n", stage + 1);
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
@@ -103,39 +103,39 @@ int main(void)
        vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
        val = 0;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
        run_vcpu(vcpu, 1);
        val = 1ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
        run_vcpu(vcpu, 2);
        val = 2ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Host: writes to MSR_IA32_TSC set the host-side offset
         * and therefore do not change MSR_IA32_TSC_ADJUST.
         */
        vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
        run_vcpu(vcpu, 3);
 
        /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
 
        /* Restore previous value.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
@@ -143,8 +143,8 @@ int main(void)
         */
        run_vcpu(vcpu, 4);
        val = 3ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
@@ -152,8 +152,8 @@ int main(void)
         */
        run_vcpu(vcpu, 5);
        val = 4ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
 
        kvm_vm_free(vm);
 
index 0cb51fa..255c50b 100644 (file)
@@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count)
                end = (unsigned long)buffer + 8192;
 
        asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
-       GUEST_ASSERT_1(count == 0, count);
-       GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+       GUEST_ASSERT_EQ(count, 0);
+       GUEST_ASSERT_EQ((unsigned long)buffer, end);
 }
 
 static void guest_code(void)
@@ -43,7 +43,9 @@ static void guest_code(void)
        memset(buffer, 0, sizeof(buffer));
        guest_ins_port80(buffer, 8192);
        for (i = 0; i < 8192; i++)
-               GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+               __GUEST_ASSERT(buffer[i] == 0xaa,
+                              "Expected '0xaa', got '0x%x' at buffer[%u]",
+                              buffer[i], i);
 
        GUEST_DONE();
 }
@@ -91,7 +93,7 @@ int main(int argc, char *argv[])
        case UCALL_DONE:
                break;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_FAIL("Unknown ucall %lu", uc.cmd);
        }
index be0bdb8..a9b827c 100644 (file)
@@ -50,7 +50,7 @@ static void set_timer(void)
        timer.it_value.tv_sec  = 0;
        timer.it_value.tv_usec = 200;
        timer.it_interval = timer.it_value;
-       ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
 }
 
 static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
index 4c90f76..ebbcb0a 100644 (file)
@@ -10,7 +10,6 @@
  * and check it can be retrieved with KVM_GET_MSR, also test
  * the invalid LBR formats are rejected.
  */
-
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include <sys/ioctl.h>
 
@@ -52,23 +51,24 @@ static const union perf_capabilities format_caps = {
        .pebs_format = -1,
 };
 
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+       __GUEST_ASSERT(vector == GP_VECTOR,
+                      "Expected #GP for value '0x%llx', got vector '0x%x'",
+                      val, vector);
+}
+
 static void guest_code(uint64_t current_val)
 {
-       uint8_t vector;
        int i;
 
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
-       GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
-
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
-       GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+       guest_test_perf_capabilities_gp(current_val);
+       guest_test_perf_capabilities_gp(0);
 
-       for (i = 0; i < 64; i++) {
-               vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
-                                   current_val ^ BIT_ULL(i));
-               GUEST_ASSERT_2(vector == GP_VECTOR,
-                              current_val ^ BIT_ULL(i), vector);
-       }
+       for (i = 0; i < 64; i++)
+               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
 
        GUEST_DONE();
 }
@@ -95,7 +95,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
@@ -103,7 +103,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
                TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
        }
 
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+                       host_cap.capabilities);
 
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
 
index 396c13f..ab75b87 100644 (file)
@@ -65,17 +65,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
        vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
 
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
-       ASSERT_EQ(uc.args[1], val);
+       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+       TEST_ASSERT_EQ(uc.args[1], val);
 
        vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
        icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
              (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
        if (!x->is_x2apic) {
                val &= (-1u | (0xffull << (32 + 24)));
-               ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
        } else {
-               ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
        }
 }
 
index 905bd5a..77d04a7 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2022, Google LLC.
  */
-
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
  * Assert that architectural dependency rules are satisfied, e.g. that AVX is
  * supported if and only if SSE is supported.
  */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)    \
-do {                                                                             \
-       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
-                                                                                 \
-       GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) ||              \
-                      __supported == ((xfeatures) | (dependencies)),             \
-                      __supported, (xfeatures), (dependencies));                 \
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
+do {                                                                                   \
+       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
+                                                                                       \
+       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
+                      __supported == ((xfeatures) | (dependencies)),                   \
+                      "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+                      __supported, (xfeatures), (dependencies));                       \
 } while (0)
 
 /*
@@ -41,7 +41,8 @@ do {                                                                            \
 do {                                                                   \
        uint64_t __supported = (supported_xcr0) & (xfeatures);          \
                                                                        \
-       GUEST_ASSERT_2(!__supported || __supported == (xfeatures),      \
+       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
+                      "supported = 0x%llx, xfeatures = 0x%llx",        \
                       __supported, (xfeatures));                       \
 } while (0)
 
@@ -79,14 +80,18 @@ static void guest_code(void)
                                    XFEATURE_MASK_XTILE);
 
        vector = xsetbv_safe(0, supported_xcr0);
-       GUEST_ASSERT_2(!vector, supported_xcr0, vector);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(0x%llx), got vector '0x%x'",
+                      supported_xcr0, vector);
 
        for (i = 0; i < 64; i++) {
                if (supported_xcr0 & BIT_ULL(i))
                        continue;
 
                vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
-               GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i));
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'",
+                              BIT_ULL(i), supported_xcr0, vector);
        }
 
        GUEST_DONE();
@@ -117,7 +122,7 @@ int main(int argc, char *argv[])
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index c94cde3..e149d05 100644 (file)
@@ -108,16 +108,16 @@ int main(int argc, char *argv[])
                vcpu_run(vcpu);
 
                if (run->exit_reason == KVM_EXIT_XEN) {
-                       ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
-                       ASSERT_EQ(run->xen.u.hcall.cpl, 0);
-                       ASSERT_EQ(run->xen.u.hcall.longmode, 1);
-                       ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
-                       ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
-                       ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
-                       ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
-                       ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
-                       ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
-                       ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
                        run->xen.u.hcall.result = RETVALUE;
                        continue;
                }
index 83d5655..2515943 100644 (file)
@@ -113,7 +113,7 @@ static bool supports_filesystem(const char *const filesystem)
 {
        char str[32];
        int len;
-       bool res;
+       bool res = true;
        FILE *const inf = fopen("/proc/filesystems", "r");
 
        /*
@@ -125,14 +125,16 @@ static bool supports_filesystem(const char *const filesystem)
 
        /* filesystem can be null for bind mounts. */
        if (!filesystem)
-               return true;
+               goto out;
 
        len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
        if (len >= sizeof(str))
                /* Ignores too-long filesystem names. */
-               return true;
+               goto out;
 
        res = fgrep(inf, str);
+
+out:
        fclose(inf);
        return res;
 }
index d328af4..e7d2a53 100755 (executable)
@@ -12,7 +12,8 @@ ksft_skip=4
 TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
        ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
        ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
-       ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test"
+       ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+       ipv4_mpath_list ipv6_mpath_list"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -2352,6 +2353,156 @@ ipv4_bcast_neigh_test()
        cleanup
 }
 
+mpath_dep_check()
+{
+       if [ ! -x "$(command -v mausezahn)" ]; then
+               echo "mausezahn command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v jq)" ]; then
+               echo "jq command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v bc)" ]; then
+               echo "bc command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v perf)" ]; then
+               echo "perf command not found. Skipping test"
+               return 1
+       fi
+
+       perf list fib:* | grep -q fib_table_lookup
+       if [ $? -ne 0 ]; then
+               echo "IPv4 FIB tracepoint not found. Skipping test"
+               return 1
+       fi
+
+       perf list fib6:* | grep -q fib6_table_lookup
+       if [ $? -ne 0 ]; then
+               echo "IPv6 FIB tracepoint not found. Skipping test"
+               return 1
+       fi
+
+       return 0
+}
+
+link_stats_get()
+{
+       local ns=$1; shift
+       local dev=$1; shift
+       local dir=$1; shift
+       local stat=$1; shift
+
+       ip -n $ns -j -s link show dev $dev \
+               | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+       local file=$1; shift
+       local expected=$1; shift
+
+       local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+       local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+       local res=$(echo "$ratio >= 0.95" | bc)
+       [[ $res -eq 1 ]]
+       log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+       echo
+       echo "IPv4 multipath list receive tests"
+
+       mpath_dep_check || return 1
+
+       route_setup
+
+       set -e
+       run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+       run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+       run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+       run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+       run_cmd "ip -n ns2 link add name nh1 up type dummy"
+       run_cmd "ip -n ns2 link add name nh2 up type dummy"
+       run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
+       run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
+       run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+       run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+       run_cmd "ip -n ns2 route add 203.0.113.0/24
+               nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+       run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+       set +e
+
+       local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+       local tmp_file=$(mktemp)
+       local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
+               -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+       # Packets forwarded in a list using a multipath route must not reuse a
+       # cached result so that a flow always hits the same nexthop. In other
+       # words, the FIB lookup tracepoint needs to be triggered for every
+       # packet.
+       local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+       local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+       list_rcv_eval $tmp_file $diff
+
+       rm $tmp_file
+       route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+       echo
+       echo "IPv6 multipath list receive tests"
+
+       mpath_dep_check || return 1
+
+       route_setup
+
+       set -e
+       run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+       run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+       run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+       run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+       run_cmd "ip -n ns2 link add name nh1 up type dummy"
+       run_cmd "ip -n ns2 link add name nh2 up type dummy"
+       run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+       run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+       run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+       run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+       run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
+               nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+       run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+       set +e
+
+       local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+       local tmp_file=$(mktemp)
+       local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
+               -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+       # Packets forwarded in a list using a multipath route must not reuse a
+       # cached result so that a flow always hits the same nexthop. In other
+       # words, the FIB lookup tracepoint needs to be triggered for every
+       # packet.
+       local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+       local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+       list_rcv_eval $tmp_file $diff
+
+       rm $tmp_file
+       route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2433,6 +2584,8 @@ do
        ipv6_mangle)                    ipv6_mangle_test;;
        ipv4_bcast_neigh)               ipv4_bcast_neigh_test;;
        fib6_gc_test|ipv6_gc)           fib6_gc_test;;
+       ipv4_mpath_list)                ipv4_mpath_list_test;;
+       ipv6_mpath_list)                ipv6_mpath_list_test;;
 
        help) echo "Test names: $TESTS"; exit 0;;
        esac
index b74916d..484d087 100644 (file)
@@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
        bool
 
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
-       bool
-
 config HAVE_KVM_INVALID_WAKEUPS
        bool
 
index 2500178..486800a 100644 (file)
@@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 }
 EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
        ++kvm->stat.generic.remote_tlb_flush_requests;
@@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
         * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
         * barrier here.
         */
-       if (!kvm_arch_flush_remote_tlb(kvm)
+       if (!kvm_arch_flush_remote_tlbs(kvm)
            || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.generic.remote_tlb_flush;
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
+{
+       if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
+               return;
+
+       /*
+        * Fall back to a flushing entire TLBs if the architecture range-based
+        * TLB invalidation is unsupported or can't be performed for whatever
+        * reason.
+        */
+       kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot)
+{
+       /*
+        * All current use cases for flushing the TLBs for a specific memslot
+        * are related to dirty logging, and many do the TLB flush out of
+        * mmu_lock. The interaction between the various operations on memslot
+        * must be serialized by slots_locks to ensure the TLB flush from one
+        * operation is observed by any other operation on the same memslot.
+        */
+       lockdep_assert_held(&kvm->slots_lock);
+       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
+}
 
 static void kvm_flush_shadow_all(struct kvm *kvm)
 {
@@ -526,7 +551,7 @@ typedef void (*on_unlock_fn_t)(struct kvm *kvm);
 struct kvm_hva_range {
        unsigned long start;
        unsigned long end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        hva_handler_t handler;
        on_lock_fn_t on_lock;
        on_unlock_fn_t on_unlock;
@@ -547,6 +572,8 @@ static void kvm_null_fn(void)
 }
 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
 
+static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
+
 /* Iterate over each memslot intersecting [start, last] (inclusive) range */
 #define kvm_for_each_memslot_in_hva_range(node, slots, start, last)         \
        for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
@@ -591,7 +618,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
                         * bother making these conditional (to avoid writes on
                         * the second or later invocation of the handler).
                         */
-                       gfn_range.pte = range->pte;
+                       gfn_range.arg = range->arg;
                        gfn_range.may_block = range->may_block;
 
                        /*
@@ -632,14 +659,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
                                                unsigned long start,
                                                unsigned long end,
-                                               pte_t pte,
+                                               union kvm_mmu_notifier_arg arg,
                                                hva_handler_t handler)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = pte,
+               .arg            = arg,
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -659,7 +686,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = __pte(0),
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -693,6 +719,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        pte_t pte)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       const union kvm_mmu_notifier_arg arg = { .pte = pte };
 
        trace_kvm_set_spte_hva(address);
 
@@ -708,7 +735,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
        if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                return;
 
-       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
+       kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
 }
 
 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
@@ -747,7 +774,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = kvm_unmap_gfn_range,
                .on_lock        = kvm_mmu_invalidate_begin,
                .on_unlock      = kvm_arch_guest_memory_reclaimed,
@@ -812,7 +838,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = (void *)kvm_null_fn,
                .on_lock        = kvm_mmu_invalidate_end,
                .on_unlock      = (void *)kvm_null_fn,
@@ -845,7 +870,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 {
        trace_kvm_age_hva(start, end);
 
-       return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn);
+       return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
+                                   kvm_age_gfn);
 }
 
 static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@@ -2180,7 +2206,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
        }
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
                return -EFAULT;
@@ -2297,7 +2323,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
        KVM_MMU_UNLOCK(kvm);
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        return 0;
 }